Commit 15b3d7f1 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2021-01-27' of...

Merge tag 'misc-habanalabs-next-2021-01-27' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.12:

- Add feature called "staged command submissions". In this feature,
  the driver allows the user to submit multiple command submissions
  that describe a single pass on the deep learning graph. The driver
  tracks the completion of the entire pass by the last stage CS.

- Update code to support the latest firmware image

- Optimizations and improvements to MMU code:
  - Support page size that is not power-of-2
  - Make the locks scheme simpler
  - mmap areas in device configuration space to userspace

- Security fixes:
  - Make ETR non-secured
  - Remove access to kernel memory through debug-fs interface
  - Remove access through PCI bar to SyncManager register block
    in Gaudi

- Many small bug fixes

* tag 'misc-habanalabs-next-2021-01-27' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (41 commits)
  habanalabs: update to latest hl_boot_if.h spec from F/W
  habanalabs/gaudi: unmask HBM interrupts after handling
  habanalabs: update SyncManager interrupt handling
  habanalabs: fix ETR security issue
  habanalabs: staged submission support
  habanalabs: modify device_idle interface
  habanalabs: add CS completion and timeout properties
  habanalabs: add new mem ioctl op for mapping hw blocks
  habanalabs: fix MMU debugfs related nodes
  habanalabs: add user available interrupt to hw_ip
  habanalabs: always try to use the hint address
  CREDITS: update email address and home address
  habanalabs: update email address in sysfs/debugfs docs
  habanalabs: add security violations dump to debugfs
  habanalabs: ignore F/W BMC errors in case no BMC present
  habanalabs/gaudi: print sync manager SEI interrupt info
  habanalabs: Use 'dma_set_mask_and_coherent()'
  habanalabs/gaudi: remove PCI access to SM block
  habanalabs: add driver support for internal cb scheduling
  habanalabs: increment ctx ref from within a cs allocation
  ...
parents 3a11b0b5 f1aebf5e
......@@ -1244,10 +1244,10 @@ S: 80050-430 - Curitiba - Paraná
S: Brazil
N: Oded Gabbay
E: oded.gabbay@gmail.com
D: HabanaLabs and AMD KFD maintainer
S: 12 Shraga Raphaeli
S: Petah-Tikva, 4906418
E: ogabbay@kernel.org
D: HabanaLabs maintainer
S: 29 Duchifat St.
S: Ra'anana 4372029
S: Israel
N: Kumar Gala
......
What: /sys/kernel/debug/habanalabs/hl<n>/addr
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets the device address to be used for read or write through
PCI bar, or the device VA of a host mapped memory to be read or
written directly from the host. The latter option is allowed
......@@ -11,7 +11,7 @@ Description: Sets the device address to be used for read or write through
What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
Date: May 2020
KernelVersion: 5.8
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Allow the root user to disable/enable in runtime the clock
gating mechanism in Gaudi. Due to how Gaudi is built, the
clock gating needs to be disabled in order to access the
......@@ -34,28 +34,28 @@ Description: Allow the root user to disable/enable in runtime the clock
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently allocated
command buffers
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently active
command submissions
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays a list with detailed information about each JOB (CB) of
each active command submission
What: /sys/kernel/debug/habanalabs/hl<n>/data32
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Allows the root user to read or write directly through the
device's PCI bar. Writing to this file generates a write
transaction while reading from the file generates a read
......@@ -70,7 +70,7 @@ Description: Allows the root user to read or write directly through the
What: /sys/kernel/debug/habanalabs/hl<n>/data64
Date: Jan 2020
KernelVersion: 5.6
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Allows the root user to read or write 64 bit data directly
through the device's PCI bar. Writing to this file generates a
write transaction while reading from the file generates a read
......@@ -85,7 +85,7 @@ Description: Allows the root user to read or write 64 bit data directly
What: /sys/kernel/debug/habanalabs/hl<n>/device
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Enables the root user to set the device to specific state.
Valid values are "disable", "enable", "suspend", "resume".
User can read this property to see the valid values
......@@ -93,28 +93,28 @@ Description: Enables the root user to set the device to specific state.
What: /sys/kernel/debug/habanalabs/hl<n>/engines
Date: Jul 2019
KernelVersion: 5.3
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays the status registers values of the device engines and
their derived idle status
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets I2C device address for I2C transaction that is generated
by the device's CPU
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets I2C bus address for I2C transaction that is generated by
the device's CPU
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_data
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Triggers an I2C transaction that is generated by the device's
CPU. Writing to this file generates a write transaction while
reading from the file generates a read transcation
......@@ -122,32 +122,32 @@ Description: Triggers an I2C transaction that is generated by the device's
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets I2C register id for I2C transaction that is generated by
the device's CPU
What: /sys/kernel/debug/habanalabs/hl<n>/led0
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets the state of the first S/W led on the device
What: /sys/kernel/debug/habanalabs/hl<n>/led1
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets the state of the second S/W led on the device
What: /sys/kernel/debug/habanalabs/hl<n>/led2
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets the state of the third S/W led on the device
What: /sys/kernel/debug/habanalabs/hl<n>/mmu
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays the hop values and physical address for a given ASID
and virtual address. The user should write the ASID and VA into
the file and then read the file to get the result.
......@@ -157,14 +157,14 @@ Description: Displays the hop values and physical address for a given ASID
What: /sys/kernel/debug/habanalabs/hl<n>/set_power_state
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
for D3Hot
What: /sys/kernel/debug/habanalabs/hl<n>/userptr
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently user
pointers (user virtual addresses) that are pinned and mapped
to DMA addresses
......@@ -172,13 +172,21 @@ Description: Displays a list with information about the currently user
What: /sys/kernel/debug/habanalabs/hl<n>/vm
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Displays a list with information about all the active virtual
address mappings per ASID
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6
Contact: oded.gabbay@gmail.com
Contact: ogabbay@kernel.org
Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
Date: Jan 2021
KernelVersion: 5.12
Contact: ogabbay@kernel.org
Description: Dumps all security violations to dmesg. This will also ack
all security violations meanings those violations will not be
dumped next time user calls this API
# SPDX-License-Identifier: GPL-2.0-only
include $(src)/common/mmu/Makefile
habanalabs-y += $(HL_COMMON_MMU_FILES)
include $(src)/common/pci/Makefile
habanalabs-y += $(HL_COMMON_PCI_FILES)
HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/asid.o common/habanalabs_ioctl.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/mmu.o common/mmu_v1.o \
common/firmware_if.o common/pci.o
common/command_submission.o common/firmware_if.o
......@@ -50,8 +50,10 @@ unsigned long hl_asid_alloc(struct hl_device *hdev)
void hl_asid_free(struct hl_device *hdev, unsigned long asid)
{
if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
"Invalid ASID %lu", asid))
if (asid == HL_KERNEL_ASID_ID || asid >= hdev->asic_prop.max_asid) {
dev_crit(hdev->dev, "Invalid ASID %lu", asid);
return;
}
clear_bit(asid, hdev->asid_bitmap);
}
......@@ -635,10 +635,12 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
cb_handle >>= PAGE_SHIFT;
cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
/* hl_cb_get should never fail here so use kernel WARN */
WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
if (!cb)
/* hl_cb_get should never fail here */
if (!cb) {
dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
(u32) cb_handle);
goto destroy_cb;
}
return cb;
......
......@@ -12,9 +12,14 @@
static void hl_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
u64 idle_mask = 0;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
int i;
/* Release all allocated pending cb's, those cb's were never
* scheduled so it is safe to release them here
*/
hl_pending_cb_list_flush(ctx);
/*
* If we arrived here, there are no jobs waiting for this context
* on its queues so we can safely remove it.
......@@ -50,12 +55,15 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
&idle_mask, NULL)))
idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
dev_notice(hdev->dev,
"device not idle after user context is closed (0x%llx)\n",
idle_mask);
"device not idle after user context is closed (0x%llx, 0x%llx)\n",
idle_mask[0], idle_mask[1]);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);
hl_vm_ctx_fini(ctx);
hl_mmu_ctx_fini(ctx);
}
}
......@@ -140,8 +148,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
kref_init(&ctx->refcount);
ctx->cs_sequence = 1;
INIT_LIST_HEAD(&ctx->pending_cb_list);
spin_lock_init(&ctx->pending_cb_lock);
spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_ctx_switch_token, 1);
atomic_set(&ctx->thread_pending_cb_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
sizeof(struct hl_fence *),
......@@ -151,11 +162,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_mmu_ctx_init(ctx);
rc = hl_vm_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
dev_err(hdev->dev, "Failed to init mem ctx module\n");
rc = -ENOMEM;
goto err_free_cs_pending;
}
rc = hdev->asic_funcs->ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "ctx_init failed\n");
goto err_vm_ctx_fini;
}
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
......@@ -194,6 +212,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
err_vm_ctx_fini:
hl_vm_ctx_fini(ctx);
err_asid_free:
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid);
err_free_cs_pending:
kfree(ctx->cs_pending);
......
......@@ -310,8 +310,8 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct hl_ctx *ctx;
struct hl_mmu_hop_info hops_info;
u64 virt_addr = dev_entry->mmu_addr;
struct hl_mmu_hop_info hops_info = {0};
u64 virt_addr = dev_entry->mmu_addr, phys_addr;
int i;
if (!hdev->mmu_enable)
......@@ -333,8 +333,19 @@ static int mmu_show(struct seq_file *s, void *data)
return 0;
}
seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
dev_entry->mmu_asid, dev_entry->mmu_addr);
phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
if (hops_info.scrambled_vaddr &&
(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
seq_printf(s,
"asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
dev_entry->mmu_asid, dev_entry->mmu_addr,
hops_info.scrambled_vaddr,
hops_info.unscrambled_paddr, phys_addr);
else
seq_printf(s,
"asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
for (i = 0 ; i < hops_info.used_hops ; i++) {
seq_printf(s, "hop%d_addr: 0x%llx\n",
......@@ -403,7 +414,7 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
hdev->asic_funcs->is_device_idle(hdev, NULL, s);
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
return 0;
}
......@@ -865,6 +876,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
return count;
}
static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
hdev->asic_funcs->ack_protection_bits_errors(hdev);
return 0;
}
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
......@@ -922,6 +944,11 @@ static const struct file_operations hl_stop_on_err_fops = {
.write = hl_stop_on_err_write
};
static const struct file_operations hl_security_violations_fops = {
.owner = THIS_MODULE,
.read = hl_security_violations_read
};
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
......@@ -1071,6 +1098,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_stop_on_err_fops);
debugfs_create_file("dump_security_violations",
0644,
dev_entry->root,
dev_entry,
&hl_security_violations_fops);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
ent = debugfs_create_file(hl_debugfs_list[i].name,
......
......@@ -142,6 +142,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
case HL_MMAP_TYPE_CB:
return hl_cb_mmap(hpriv, vma);
case HL_MMAP_TYPE_BLOCK:
return hl_hw_block_mmap(hpriv, vma);
}
return -EINVAL;
......@@ -373,7 +376,6 @@ static int device_early_init(struct hl_device *hdev)
mutex_init(&hdev->send_cpu_message_lock);
mutex_init(&hdev->debug_lock);
mutex_init(&hdev->mmu_cache_lock);
INIT_LIST_HEAD(&hdev->cs_mirror_list);
spin_lock_init(&hdev->cs_mirror_lock);
INIT_LIST_HEAD(&hdev->fpriv_list);
......@@ -414,7 +416,6 @@ static void device_early_fini(struct hl_device *hdev)
{
int i;
mutex_destroy(&hdev->mmu_cache_lock);
mutex_destroy(&hdev->debug_lock);
mutex_destroy(&hdev->send_cpu_message_lock);
......@@ -1314,11 +1315,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->compute_ctx = NULL;
hl_debugfs_add_device(hdev);
/* debugfs nodes are created in hl_ctx_init so it must be called after
* hl_debugfs_add_device.
*/
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel context\n");
kfree(hdev->kernel_ctx);
goto mmu_fini;
goto remove_device_from_debugfs;
}
rc = hl_cb_pool_init(hdev);
......@@ -1327,8 +1333,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto release_ctx;
}
hl_debugfs_add_device(hdev);
/*
* From this point, in case of an error, add char devices and create
* sysfs nodes as part of the error flow, to allow debugging.
......@@ -1417,6 +1421,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev,
"kernel ctx is still alive on initialization failure\n");
remove_device_from_debugfs:
hl_debugfs_remove_device(hdev);
mmu_fini:
hl_mmu_fini(hdev);
eq_fini:
......@@ -1482,7 +1488,8 @@ void hl_device_fini(struct hl_device *hdev)
usleep_range(50, 200);
rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
if (ktime_compare(ktime_get(), timeout) > 0) {
WARN(1, "Failed to remove device because reset function did not finish\n");
dev_crit(hdev->dev,
"Failed to remove device because reset function did not finish\n");
return;
}
}
......@@ -1515,8 +1522,6 @@ void hl_device_fini(struct hl_device *hdev)
device_late_fini(hdev);
hl_debugfs_remove_device(hdev);
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
......@@ -1548,6 +1553,8 @@ void hl_device_fini(struct hl_device *hdev)
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
dev_err(hdev->dev, "kernel ctx is still alive\n");
hl_debugfs_remove_device(hdev);
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
......
......@@ -279,8 +279,74 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
return rc;
}
int hl_fw_cpucp_info_get(struct hl_device *hdev,
static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 cpu_security_boot_status_reg)
{
u32 err_val, security_val;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
* in different registers. Therefore, we need to check those
* registers and print the exact errors. Moreover, there
* may be multiple errors, so we need to report on each error
* separately. Some of the error codes might indicate a state
* that is not an error per-se, but it is an error in production
* environment
*/
err_val = RREG32(boot_err0_reg);
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return 0;
if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
dev_err(hdev->dev,
"Device boot error - DRAM initialization failed\n");
if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
dev_err(hdev->dev,
"Device boot error - Thermal Sensor initialization failed\n");
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
if (hdev->bmc_enable)
dev_warn(hdev->dev,
"Device boot error - Skipped waiting for BMC\n");
else
err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
}
if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not available\n");
if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
dev_warn(hdev->dev,
"Device boot warning - security not ready\n");
if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
dev_err(hdev->dev, "Device boot error - security failure\n");
if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
dev_err(hdev->dev, "Device boot error - PLL failure\n");
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
if (err_val & ~CPU_BOOT_ERR0_ENABLED)
return -EIO;
return 0;
}
int hl_fw_cpucp_info_get(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct cpucp_packet pkt = {};
......@@ -314,6 +380,12 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
goto out;
}
rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
if (rc) {
dev_err(hdev->dev, "Errors in device boot\n");
goto out;
}
memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
sizeof(prop->cpucp_info));
......@@ -483,58 +555,6 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
return rc;
}
static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 cpu_security_boot_status_reg)
{
u32 err_val, security_val;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
* in different registers. Therefore, we need to check those
* registers and print the exact errors. Moreover, there
* may be multiple errors, so we need to report on each error
* separately. Some of the error codes might indicate a state
* that is not an error per-se, but it is an error in production
* environment
*/
err_val = RREG32(boot_err0_reg);
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return;
if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
dev_err(hdev->dev,
"Device boot error - DRAM initialization failed\n");
if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
dev_err(hdev->dev,
"Device boot error - Thermal Sensor initialization failed\n");
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
dev_warn(hdev->dev,
"Device boot error - Skipped waiting for BMC\n");
if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not available\n");
if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
dev_warn(hdev->dev,
"Device boot warning - security not ready\n");
if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
dev_err(hdev->dev, "Device boot error - security failure\n");
if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
}
static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
{
/* Some of the status codes below are deprecated in newer f/w
......@@ -659,6 +679,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
prop->fw_security_disabled = true;
}
dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
security_status);
dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
......@@ -753,6 +776,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (prop->fw_boot_cpu_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
dev_dbg(hdev->dev,
"Firmware boot CPU security status %#x\n",
prop->fw_boot_cpu_security_map);
}
dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
......@@ -826,6 +853,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
if (rc)
return rc;
/* Clear reset status since we need to read again from app */
prop->hard_reset_done_by_fw = false;
......@@ -837,6 +868,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (prop->fw_app_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
dev_dbg(hdev->dev,
"Firmware application CPU security status %#x\n",
prop->fw_app_security_map);
}
dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
......@@ -844,6 +879,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
dev_info(hdev->dev, "Successfully loaded firmware to device\n");
return 0;
out:
fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
......
This diff is collapsed.
......@@ -57,12 +57,23 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
hw_ip.sram_base_address = prop->sram_user_base_address;
hw_ip.dram_base_address = prop->dram_user_base_address;
hw_ip.dram_base_address =
hdev->mmu_enable && prop->dram_supports_virtual_memory ?
prop->dmmu.start_addr : prop->dram_user_base_address;
hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
hw_ip.sram_size = prop->sram_size - sram_kmd_size;
if (hdev->mmu_enable)
hw_ip.dram_size =
DIV_ROUND_DOWN_ULL(prop->dram_size - dram_kmd_size,
prop->dram_page_size) *
prop->dram_page_size;
else
hw_ip.dram_size = prop->dram_size - dram_kmd_size;
if (hw_ip.dram_size > PAGE_SIZE)
hw_ip.dram_enabled = 1;
hw_ip.dram_page_size = prop->dram_page_size;
hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
......@@ -79,6 +90,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
hw_ip.first_available_interrupt_id =
prop->first_available_user_msix_interrupt;
return copy_to_user(out, &hw_ip,
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
}
......@@ -132,9 +145,10 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
return -EINVAL;
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
&hw_idle.busy_engines_mask_ext, NULL);
hw_idle.busy_engines_mask_ext,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
hw_idle.busy_engines_mask =
lower_32_bits(hw_idle.busy_engines_mask_ext);
lower_32_bits(hw_idle.busy_engines_mask_ext[0]);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
......
......@@ -38,7 +38,7 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
return (abs(delta) - queue_len);
}
void hl_int_hw_queue_update_ci(struct hl_cs *cs)
void hl_hw_queue_update_ci(struct hl_cs *cs)
{
struct hl_device *hdev = cs->ctx->hdev;
struct hl_hw_queue *q;
......@@ -53,8 +53,13 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
return;
/* We must increment CI for every queue that will never get a
* completion, there are 2 scenarios this can happen:
* 1. All queues of a non completion CS will never get a completion.
* 2. Internal queues never gets completion.
*/
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
if (q->queue_type == QUEUE_TYPE_INT)
if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
}
}
......@@ -292,6 +297,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
len = job->job_cb_size;
ptr = cb->bus_address;
/* Skip completion flow in case this is a non completion CS */
if (!cs_needs_completion(job->cs))
goto submit_bd;
cq_pkt.data = cpu_to_le32(
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
& CQ_ENTRY_SHADOW_INDEX_MASK) |
......@@ -318,6 +327,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
cq->pi = hl_cq_inc_ptr(cq->pi);
submit_bd:
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
}
......@@ -525,6 +535,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
struct hl_cs_job *job, *tmp;
struct hl_hw_queue *q;
int rc = 0, i, cq_cnt;
bool first_entry;
u32 max_queues;
cntr = &hdev->aggregated_cs_counters;
......@@ -548,7 +559,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
switch (q->queue_type) {
case QUEUE_TYPE_EXT:
rc = ext_queue_sanity_checks(hdev, q,
cs->jobs_in_queue_cnt[i], true);
cs->jobs_in_queue_cnt[i],
cs_needs_completion(cs) ?
true : false);
break;
case QUEUE_TYPE_INT:
rc = int_queue_sanity_checks(hdev, q,
......@@ -583,12 +596,38 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
hdev->asic_funcs->collective_wait_init_cs(cs);
spin_lock(&hdev->cs_mirror_lock);
/* Verify staged CS exists and add to the staged list */
if (cs->staged_cs && !cs->staged_first) {
struct hl_cs *staged_cs;
staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
if (!staged_cs) {
dev_err(hdev->dev,
"Cannot find staged submission sequence %llu",
cs->staged_sequence);
rc = -EINVAL;
goto unlock_cs_mirror;
}
if (is_staged_cs_last_exists(hdev, staged_cs)) {
dev_err(hdev->dev,
"Staged submission sequence %llu already submitted",
cs->staged_sequence);
rc = -EINVAL;
goto unlock_cs_mirror;
}
list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
}
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
/* Queue TDR if the CS is the first entry and if timeout is wanted */
first_entry = list_first_entry(&hdev->cs_mirror_list,
struct hl_cs, mirror_node) == cs;
if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
(list_first_entry(&hdev->cs_mirror_list,
struct hl_cs, mirror_node) == cs)) {
first_entry && cs_needs_timeout(cs)) {
cs->tdr_active = true;
schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
......@@ -623,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
goto out;
unlock_cs_mirror:
spin_unlock(&hdev->cs_mirror_lock);
unroll_cq_resv:
q = &hdev->kernel_queues[0];
for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
......
This diff is collapsed.
# SPDX-License-Identifier: GPL-2.0-only
HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o
......@@ -7,7 +7,7 @@
#include <linux/slab.h>
#include "habanalabs.h"
#include "../habanalabs.h"
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
......@@ -166,13 +166,21 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
mmu_prop = &prop->pmmu;
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
*/
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
/*
* MMU page size may differ from DRAM page size.
* In such case work with the DRAM page size and let the MMU
* scrambling routine to handle this mismatch when
* calculating the address to remove from the MMU page table
*/
if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
real_page_size = prop->dram_page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %uKB aligned, can't unmap\n",
......@@ -180,6 +188,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
return -EFAULT;
}
}
npages = page_size / real_page_size;
real_virt_addr = virt_addr;
......@@ -253,6 +262,17 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
*/
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
(prop->dram_page_size < mmu_prop->page_size)) {
/*
* MMU page size may differ from DRAM page size.
* In such case work with the DRAM page size and let the MMU
* scrambling routine handle this mismatch when calculating
* the address to place in the MMU page table. (in that case
* also make sure that the dram_page_size smaller than the
* mmu page size)
*/
real_page_size = prop->dram_page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %uKB aligned, can't map\n",
......@@ -261,9 +281,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
return -EFAULT;
}
WARN_ONCE((phys_addr & (real_page_size - 1)),
"Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
phys_addr, real_page_size);
/*
* Verify that the phys and virt addresses are aligned with the
* MMU page size (in dram this means checking the address and MMU
* after scrambling)
*/
if ((is_dram_addr &&
((hdev->asic_funcs->scramble_addr(hdev, phys_addr) &
(mmu_prop->page_size - 1)) ||
(hdev->asic_funcs->scramble_addr(hdev, virt_addr) &
(mmu_prop->page_size - 1)))) ||
(!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
(virt_addr & (real_page_size - 1)))))
dev_crit(hdev->dev,
"Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
phys_addr, virt_addr, real_page_size);
npages = page_size / real_page_size;
real_virt_addr = virt_addr;
......@@ -444,19 +476,53 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
}
static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops,
u64 *phys_addr)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
u32 hop0_shift_off;
void *p;
/* last hop holds the phys address and flags */
if (hops->unscrambled_paddr)
tmp_phys_addr = hops->unscrambled_paddr;
else
tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
p = &prop->pmmu_huge;
else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
p = &prop->pmmu;
else /* HL_VA_RANGE_TYPE_DRAM */
p = &prop->dmmu;
/*
* find the correct hop shift field in hl_mmu_properties structure
* in order to determine the right maks for the page offset.
*/
hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
p = (char *)p + hop0_shift_off;
p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
hop_shift = *(u64 *)p;
offset_mask = (1 << hop_shift) - 1;
addr_mask = ~(offset_mask);
*phys_addr = (tmp_phys_addr & addr_mask) |
(virt_addr & offset_mask);
}
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
{
struct hl_mmu_hop_info hops;
u64 tmp_addr;
int rc;
rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
if (rc)
return rc;
/* last hop holds the phys address and flags */
tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val;
*phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK);
hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops, phys_addr);
return 0;
}
......@@ -473,6 +539,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
if (!hdev->mmu_enable)
return -EOPNOTSUPP;
hops->scrambled_vaddr = virt_addr; /* assume no scrambling */
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
......@@ -491,6 +559,11 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
mutex_unlock(&ctx->mmu_lock);
/* add page offset to physical address */
if (hops->unscrambled_paddr)
hl_mmu_pa_page_with_offset(ctx, virt_addr, hops,
&hops->unscrambled_paddr);
return rc;
}
......@@ -512,3 +585,28 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
return 0;
}
/**
* hl_mmu_scramble_addr() - The generic mmu address scrambling routine.
* @hdev: pointer to device data.
* @addr: The address to scramble.
*
* Return: The scrambled address.
*/
u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr)
{
return addr;
}
/**
* hl_mmu_descramble_addr() - The generic mmu address descrambling
* routine.
* @hdev: pointer to device data.
* @addr: The address to descramble.
*
* Return: The un-scrambled address.
*/
u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)
{
return addr;
}
......@@ -5,8 +5,8 @@
* All Rights Reserved.
*/
#include "habanalabs.h"
#include "../include/hw_ip/mmu/mmu_general.h"
#include "../habanalabs.h"
#include "../../include/hw_ip/mmu/mmu_general.h"
#include <linux/slab.h>
......
# SPDX-License-Identifier: GPL-2.0-only
HL_COMMON_PCI_FILES := common/pci/pci.o
......@@ -5,8 +5,8 @@
* All Rights Reserved.
*/
#include "habanalabs.h"
#include "../include/hw_ip/pci/pci_general.h"
#include "../habanalabs.h"
#include "../../include/hw_ip/pci/pci_general.h"
#include <linux/pci.h>
......@@ -307,40 +307,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
return rc;
}
/**
* hl_pci_set_dma_mask() - Set DMA masks for the device.
* @hdev: Pointer to hl_device structure.
*
* This function sets the DMA masks (regular and consistent) for a specified
* value. If it doesn't succeed, it tries to set it to a fall-back value
*
* Return: 0 on success, non-zero for failure.
*/
static int hl_pci_set_dma_mask(struct hl_device *hdev)
{
struct pci_dev *pdev = hdev->pdev;
int rc;
/* set DMA mask */
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
if (rc) {
dev_err(hdev->dev,
"Failed to set pci dma mask to %d bits, error %d\n",
hdev->dma_mask, rc);
return rc;
}
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
if (rc) {
dev_err(hdev->dev,
"Failed to set pci consistent dma mask to %d bits, error %d\n",
hdev->dma_mask, rc);
return rc;
}
return 0;
}
/**
* hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure.
......@@ -377,9 +343,14 @@ int hl_pci_init(struct hl_device *hdev)
goto unmap_pci_bars;
}
rc = hl_pci_set_dma_mask(hdev);
if (rc)
rc = dma_set_mask_and_coherent(&pdev->dev,
DMA_BIT_MASK(hdev->dma_mask));
if (rc) {
dev_err(hdev->dev,
"Failed to set dma mask to %d bits, error %d\n",
hdev->dma_mask, rc);
goto unmap_pci_bars;
}
return 0;
......
This diff is collapsed.
......@@ -251,11 +251,13 @@ enum gaudi_nic_mask {
* @hdev: habanalabs device structure.
* @kref: refcount of this SOB group. group will reset once refcount is zero.
* @base_sob_id: base sob id of this SOB group.
* @queue_id: id of the queue that waits on this sob group
*/
struct gaudi_hw_sob_group {
struct hl_device *hdev;
struct kref kref;
u32 base_sob_id;
u32 queue_id;
};
#define NUM_SOB_GROUPS (HL_RSVD_SOBS * QMAN_STREAMS)
......@@ -333,6 +335,7 @@ struct gaudi_device {
};
void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
......
......@@ -634,9 +634,21 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
/* Workaround for H3 #HW-2075 bug: use small data chunks */
WREG32(mmPSOC_ETR_AXICTL, (is_host ? 0 : 0x700) |
PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
if (hdev->asic_prop.fw_security_disabled) {
/* make ETR not privileged */
val = FIELD_PREP(
PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
/* make ETR non-secured (inverted logic) */
val |= FIELD_PREP(
PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
/*
* Workaround for H3 #HW-2075 bug: use small data
* chunks
*/
val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK,
is_host ? 0 : 7);
WREG32(mmPSOC_ETR_AXICTL, val);
}
WREG32(mmPSOC_ETR_DBALO,
lower_32_bits(input->buffer_address));
WREG32(mmPSOC_ETR_DBAHI,
......
......@@ -13052,3 +13052,8 @@ void gaudi_init_security(struct hl_device *hdev)
gaudi_init_protection_bits(hdev);
}
void gaudi_ack_protection_bits_errors(struct hl_device *hdev)
{
}
......@@ -455,6 +455,8 @@ int goya_get_fixed_properties(struct hl_device *hdev)
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
prop->first_available_user_msix_interrupt = USHRT_MAX;
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
......@@ -2914,7 +2916,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
else
timeout = HL_DEVICE_TIMEOUT_USEC;
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
dev_err_ratelimited(hdev->dev,
"Can't send driver job on QMAN0 because the device is not idle\n");
return -EBUSY;
......@@ -3876,10 +3878,10 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
patched_cb_handle >>= PAGE_SHIFT;
parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
(u32) patched_cb_handle);
/* hl_cb_get should never fail here so use kernel WARN */
WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
(u32) patched_cb_handle);
/* hl_cb_get should never fail here */
if (!parser->patched_cb) {
dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
(u32) patched_cb_handle);
rc = -EFAULT;
goto out;
}
......@@ -3948,10 +3950,10 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
patched_cb_handle >>= PAGE_SHIFT;
parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
(u32) patched_cb_handle);
/* hl_cb_get should never fail here so use kernel WARN */
WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
(u32) patched_cb_handle);
/* hl_cb_get should never fail here */
if (!parser->patched_cb) {
dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
(u32) patched_cb_handle);
rc = -EFAULT;
goto out;
}
......@@ -4122,9 +4124,6 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
......@@ -4178,9 +4177,6 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
......@@ -4223,9 +4219,6 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
......@@ -4266,9 +4259,6 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
......@@ -4877,8 +4867,6 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
goya_mmu_prepare(hdev, asid);
goya_clear_sm_regs(hdev);
return 0;
......@@ -5044,7 +5032,7 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
return;
if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
WARN(1, "asid %u is too big\n", asid);
dev_crit(hdev->dev, "asid %u is too big\n", asid);
return;
}
......@@ -5073,8 +5061,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
mutex_lock(&hdev->mmu_cache_lock);
/* L0 & L1 invalidation */
WREG32(mmSTLB_INV_ALL_START, 1);
......@@ -5086,8 +5072,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
1000,
timeout_usec);
mutex_unlock(&hdev->mmu_cache_lock);
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
......@@ -5117,8 +5101,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
mutex_lock(&hdev->mmu_cache_lock);
/*
* TODO: currently invalidate entire L0 & L1 as in regular hard
* invalidation. Need to apply invalidation of specific cache lines with
......@@ -5141,8 +5123,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
1000,
timeout_usec);
mutex_unlock(&hdev->mmu_cache_lock);
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
......@@ -5172,7 +5152,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
if (rc)
return rc;
......@@ -5207,11 +5187,12 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
/* clock gating not supported in Goya */
}
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
struct seq_file *s)
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s)
{
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
unsigned long *mask = (unsigned long *)mask_arr;
u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
mme_arch_sts;
bool is_idle = true, is_eng_idle;
......@@ -5231,9 +5212,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
IS_DMA_IDLE(dma_core_sts0);
is_idle &= is_eng_idle;
if (mask)
*mask |= ((u64) !is_eng_idle) <<
(GOYA_ENGINE_ID_DMA_0 + i);
if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
if (s)
seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
qm_glbl_sts0, dma_core_sts0);
......@@ -5255,9 +5235,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
IS_TPC_IDLE(tpc_cfg_sts);
is_idle &= is_eng_idle;
if (mask)
*mask |= ((u64) !is_eng_idle) <<
(GOYA_ENGINE_ID_TPC_0 + i);
if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
if (s)
seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
......@@ -5276,8 +5255,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
IS_MME_IDLE(mme_arch_sts);
is_idle &= is_eng_idle;
if (mask)
*mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_MME_0, mask);
if (s) {
seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
cmdq_glbl_sts0, mme_arch_sts);
......@@ -5321,6 +5300,9 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
static int goya_ctx_init(struct hl_ctx *ctx)
{
if (ctx->asid != HL_KERNEL_ASID_ID)
goya_mmu_prepare(ctx->hdev, ctx->asid);
return 0;
}
......@@ -5399,6 +5381,18 @@ static void goya_ctx_fini(struct hl_ctx *ctx)
}
static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
u32 *block_id)
{
return -EPERM;
}
static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size)
{
return -EPERM;
}
static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init,
.early_fini = goya_early_fini,
......@@ -5475,7 +5469,12 @@ static const struct hl_asic_funcs goya_funcs = {
.set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
.get_device_time = goya_get_device_time,
.collective_wait_init_cs = goya_collective_wait_init_cs,
.collective_wait_create_jobs = goya_collective_wait_create_jobs
.collective_wait_create_jobs = goya_collective_wait_create_jobs,
.scramble_addr = hl_mmu_scramble_addr,
.descramble_addr = hl_mmu_descramble_addr,
.ack_protection_bits_errors = goya_ack_protection_bits_errors,
.get_hw_block_id = goya_get_hw_block_id,
.hw_block_mmap = goya_block_mmap
};
/*
......
......@@ -173,6 +173,7 @@ void goya_init_mme_qmans(struct hl_device *hdev);
void goya_init_tpc_qmans(struct hl_device *hdev);
int goya_init_cpu_queues(struct hl_device *hdev);
void goya_init_security(struct hl_device *hdev);
void goya_ack_protection_bits_errors(struct hl_device *hdev);
int goya_late_init(struct hl_device *hdev);
void goya_late_fini(struct hl_device *hdev);
......
......@@ -434,8 +434,15 @@ static int goya_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
WREG32(mmPSOC_ETR_AXICTL,
0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
if (hdev->asic_prop.fw_security_disabled) {
/* make ETR not privileged */
val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
/* make ETR non-secured (inverted logic) */
val |= FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
/* burst size 8 */
val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK, 7);
WREG32(mmPSOC_ETR_AXICTL, val);
}
WREG32(mmPSOC_ETR_DBALO,
lower_32_bits(input->buffer_address));
WREG32(mmPSOC_ETR_DBAHI,
......
......@@ -3120,3 +3120,8 @@ void goya_init_security(struct hl_device *hdev)
goya_init_protection_bits(hdev);
}
void goya_ack_protection_bits_errors(struct hl_device *hdev)
{
}
......@@ -58,11 +58,25 @@ struct hl_eq_ecc_data {
__u8 pad[7];
};
enum hl_sm_sei_cause {
SM_SEI_SO_OVERFLOW,
SM_SEI_LBW_4B_UNALIGNED,
SM_SEI_AXI_RESPONSE_ERR
};
struct hl_eq_sm_sei_data {
__le32 sei_log;
/* enum hl_sm_sei_cause */
__u8 sei_cause;
__u8 pad[3];
};
struct hl_eq_entry {
struct hl_eq_header hdr;
union {
struct hl_eq_ecc_data ecc_data;
struct hl_eq_hbm_ecc_data hbm_ecc_data;
struct hl_eq_sm_sei_data sm_sei_data;
__le64 data[7];
};
};
......
......@@ -70,6 +70,9 @@
* checksum. Trying to program image again
* might solve this.
*
* CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one
* of the PLLs remains in REF_CLK
*
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the
* running FW populates the error
......@@ -88,6 +91,7 @@
#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << 9)
#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10)
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
/*
......@@ -150,10 +154,18 @@
* CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available
* for use.
* Initialized in: preboot
*
* CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled.
* FW initialized Clock Gating.
* Initialized in: preboot
*
* CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled.
* FW handles HBM ECC indications.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the
* running FW populates the device status
......@@ -175,7 +187,9 @@
#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9)
#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10)
#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11)
#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12)
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status {
......
......@@ -212,6 +212,10 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC_SEI_2 = 266,
GAUDI_EVENT_NIC_SEI_3 = 267,
GAUDI_EVENT_NIC_SEI_4 = 268,
GAUDI_EVENT_DMA_IF_SEI_0 = 277,
GAUDI_EVENT_DMA_IF_SEI_1 = 278,
GAUDI_EVENT_DMA_IF_SEI_2 = 279,
GAUDI_EVENT_DMA_IF_SEI_3 = 280,
GAUDI_EVENT_PCIE_FLR = 290,
GAUDI_EVENT_TPC0_BMON_SPMU = 300,
GAUDI_EVENT_TPC0_KRN_ERR = 301,
......
......@@ -389,6 +389,9 @@ enum axi_id {
#define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5 RAZWI_INITIATOR_ID_X_Y(8, 6)
#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1
#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK 0x1
#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK 0x2
#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK 0xF00
/* STLB_CACHE_INV */
#define STLB_CACHE_INV_PRODUCER_INDEX_SHIFT 0
......
......@@ -78,6 +78,9 @@ struct packet_wreg_bulk {
__le64 values[0]; /* data starts here */
};
#define GAUDI_PKT_LONG_CTL_OP_SHIFT 20
#define GAUDI_PKT_LONG_CTL_OP_MASK 0x00300000
struct packet_msg_long {
__le32 value;
__le32 ctl;
......@@ -111,18 +114,6 @@ struct packet_msg_long {
#define GAUDI_PKT_SHORT_CTL_BASE_SHIFT 22
#define GAUDI_PKT_SHORT_CTL_BASE_MASK 0x00C00000
#define GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT 24
#define GAUDI_PKT_SHORT_CTL_OPCODE_MASK 0x1F000000
#define GAUDI_PKT_SHORT_CTL_EB_SHIFT 29
#define GAUDI_PKT_SHORT_CTL_EB_MASK 0x20000000
#define GAUDI_PKT_SHORT_CTL_RB_SHIFT 30
#define GAUDI_PKT_SHORT_CTL_RB_MASK 0x40000000
#define GAUDI_PKT_SHORT_CTL_MB_SHIFT 31
#define GAUDI_PKT_SHORT_CTL_MB_MASK 0x80000000
struct packet_msg_short {
__le32 value;
__le32 ctl;
......@@ -146,18 +137,6 @@ struct packet_msg_prot {
#define GAUDI_PKT_FENCE_CTL_PRED_SHIFT 0
#define GAUDI_PKT_FENCE_CTL_PRED_MASK 0x0000001F
#define GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT 24
#define GAUDI_PKT_FENCE_CTL_OPCODE_MASK 0x1F000000
#define GAUDI_PKT_FENCE_CTL_EB_SHIFT 29
#define GAUDI_PKT_FENCE_CTL_EB_MASK 0x20000000
#define GAUDI_PKT_FENCE_CTL_RB_SHIFT 30
#define GAUDI_PKT_FENCE_CTL_RB_MASK 0x40000000
#define GAUDI_PKT_FENCE_CTL_MB_SHIFT 31
#define GAUDI_PKT_FENCE_CTL_MB_MASK 0x80000000
struct packet_fence {
__le32 cfg;
__le32 ctl;
......
......@@ -260,5 +260,8 @@
#define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1
#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK 0x1
#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK 0x2
#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK 0xF00
#endif /* ASIC_REG_GOYA_MASKS_H_ */
......@@ -309,7 +309,9 @@ struct hl_info_hw_ip_info {
__u32 num_of_events;
__u32 device_id; /* PCI Device ID */
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
__u32 reserved[2];
__u32 reserved;
__u16 first_available_interrupt_id;
__u16 reserved2;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
......@@ -320,6 +322,8 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
__u64 reserved3;
__u64 dram_page_size;
};
struct hl_info_dram_usage {
......@@ -327,6 +331,8 @@ struct hl_info_dram_usage {
__u64 ctx_dram_mem;
};
#define HL_BUSY_ENGINES_MASK_EXT_SIZE 2
struct hl_info_hw_idle {
__u32 is_idle;
/*
......@@ -339,7 +345,7 @@ struct hl_info_hw_idle {
* Extended Bitmask of busy engines.
* Bits definition is according to `enum <chip>_enging_id'.
*/
__u64 busy_engines_mask_ext;
__u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE];
};
struct hl_info_device_status {
......@@ -609,6 +615,9 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_WAIT 0x4
#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8
#define HL_CS_FLAGS_TIMESTAMP 0x20
#define HL_CS_FLAGS_STAGED_SUBMISSION 0x40
#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80
#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100
#define HL_CS_STATUS_SUCCESS 0
......@@ -622,11 +631,18 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
union {
/* this holds address of array of hl_cs_chunk for store phase -
* Currently not in use
*/
__u64 chunks_store;
/* Sequence number of a staged submission CS
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
*/
__u64 seq;
};
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
*/
......@@ -704,6 +720,8 @@ union hl_wait_cs_args {
#define HL_MEM_OP_MAP 2
/* Opcode to unmap previously mapped host and device memory */
#define HL_MEM_OP_UNMAP 3
/* Opcode to map a hw block */
#define HL_MEM_OP_MAP_BLOCK 4
/* Memory flags */
#define HL_MEM_CONTIGUOUS 0x1
......@@ -758,6 +776,17 @@ struct hl_mem_in {
__u64 mem_size;
} map_host;
/* HL_MEM_OP_MAP_BLOCK - map a hw block */
struct {
/*
* HW block address to map, a handle will be returned
* to the user and will be used to mmap the relevant
* block. Only addresses from configuration space are
* allowed.
*/
__u64 block_addr;
} map_block;
/* HL_MEM_OP_UNMAP - unmap host memory */
struct {
/* Virtual address returned from HL_MEM_OP_MAP */
......@@ -784,8 +813,9 @@ struct hl_mem_out {
__u64 device_virt_addr;
/*
* Used for HL_MEM_OP_ALLOC. This is the assigned
* handle for the allocated memory
* Used for HL_MEM_OP_ALLOC and HL_MEM_OP_MAP_BLOCK.
* This is the assigned handle for the allocated memory
* or mapped block
*/
__u64 handle;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment