Commit 5ffc06eb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc

Pull habanalabs updates from Greg KH:
 "Here is another round of misc driver patches for 5.15-rc1.

  In here is only updates for the Habanalabs driver. This request is
  late because the previously-objected-to dma-buf patches are all
  removed and some fixes that you and others found are now included in
  here as well.

  All of these have been in linux-next for well over a week with no
  reports of problems, and they are all self-contained to only this one
  driver. Full details are in the shortlog"

* tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (61 commits)
  habanalabs/gaudi: hwmon default card name
  habanalabs: add support for f/w reset
  habanalabs/gaudi: block ICACHE_BASE_ADDERESS_HIGH in TPC
  habanalabs: cannot sleep while holding spinlock
  habanalabs: never copy_from_user inside spinlock
  habanalabs: remove unnecessary device status check
  habanalabs: disable IRQ in user interrupts spinlock
  habanalabs: add "in device creation" status
  habanalabs/gaudi: invalidate PMMU mem cache on init
  habanalabs/gaudi: size should be printed in decimal
  habanalabs/gaudi: define DC POWER for secured PMC
  habanalabs/gaudi: unmask out of bounds SLM access interrupt
  habanalabs: add userptr_lookup node in debugfs
  habanalabs/gaudi: fetch TPC/MME ECC errors from F/W
  habanalabs: modify multi-CS to wait on stream masters
  habanalabs/gaudi: add monitored SOBs to state dump
  habanalabs/gaudi: restore user registers when context opens
  habanalabs/gaudi: increase boot fit timeout
  habanalabs: update to latest firmware headers
  habanalabs/gaudi: minimize number of register reads
  ...
parents a668acb8 4cd67adc
...@@ -215,6 +215,17 @@ Description: Sets the skip reset on timeout option for the device. Value of ...@@ -215,6 +215,17 @@ Description: Sets the skip reset on timeout option for the device. Value of
"0" means device will be reset in case some CS has timed out, "0" means device will be reset in case some CS has timed out,
otherwise it will not be reset. otherwise it will not be reset.
What: /sys/kernel/debug/habanalabs/hl<n>/state_dump
Date: Oct 2021
KernelVersion: 5.15
Contact: ynudelman@habana.ai
Description: Gets the state dump occurring on a CS timeout or failure.
State dump is used for debug and is created each time in case of
a problem in a CS execution, before reset.
Reading from the node returns the newest state dump available.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020 Date: Mar 2020
KernelVersion: 5.6 KernelVersion: 5.6
...@@ -230,6 +241,14 @@ Description: Displays a list with information about the currently user ...@@ -230,6 +241,14 @@ Description: Displays a list with information about the currently user
pointers (user virtual addresses) that are pinned and mapped pointers (user virtual addresses) that are pinned and mapped
to DMA addresses to DMA addresses
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
Date: Aug 2021
KernelVersion: 5.15
Contact: ogabbay@kernel.org
Description: Allows to search for specific user pointers (user virtual
addresses) that are pinned and mapped to DMA addresses, and see
their resolution to the specific dma address.
What: /sys/kernel/debug/habanalabs/hl<n>/vm What: /sys/kernel/debug/habanalabs/hl<n>/vm
Date: Jan 2019 Date: Jan 2019
KernelVersion: 5.1 KernelVersion: 5.1
......
...@@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ ...@@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/asid.o common/habanalabs_ioctl.o \ common/asid.o common/habanalabs_ioctl.o \
common/command_buffer.o common/hw_queue.o common/irq.o \ common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \ common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o common/command_submission.o common/firmware_if.o \
common/state_dump.o
...@@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, ...@@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
spin_lock(&mgr->cb_lock); spin_lock(&mgr->cb_lock);
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
if (rc < 0)
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
spin_unlock(&mgr->cb_lock); spin_unlock(&mgr->cb_lock);
if (rc < 0) { if (rc < 0) {
...@@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) ...@@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
vma->vm_private_data = cb; vma->vm_private_data = cb;
rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
cb->bus_address, cb->size); cb->bus_address, cb->size);
if (rc) { if (rc) {
spin_lock(&cb->lock); spin_lock(&cb->lock);
......
...@@ -9,16 +9,70 @@ ...@@ -9,16 +9,70 @@
#include <linux/slab.h> #include <linux/slab.h>
void hl_encaps_handle_do_release(struct kref *ref)
{
struct hl_cs_encaps_sig_handle *handle =
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
struct hl_ctx *ctx = handle->hdev->compute_ctx;
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
spin_lock(&mgr->lock);
idr_remove(&mgr->handles, handle->id);
spin_unlock(&mgr->lock);
kfree(handle);
}
static void hl_encaps_handle_do_release_sob(struct kref *ref)
{
struct hl_cs_encaps_sig_handle *handle =
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
struct hl_ctx *ctx = handle->hdev->compute_ctx;
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
/* if we're here, then there was a signals reservation but cs with
* encaps signals wasn't submitted, so need to put refcount
* to hw_sob taken at the reservation.
*/
hw_sob_put(handle->hw_sob);
spin_lock(&mgr->lock);
idr_remove(&mgr->handles, handle->id);
spin_unlock(&mgr->lock);
kfree(handle);
}
static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
{
spin_lock_init(&mgr->lock);
idr_init(&mgr->handles);
}
static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
struct hl_encaps_signals_mgr *mgr)
{
struct hl_cs_encaps_sig_handle *handle;
struct idr *idp;
u32 id;
idp = &mgr->handles;
if (!idr_is_empty(idp)) {
dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
idr_for_each_entry(idp, handle, id)
kref_put(&handle->refcount,
hl_encaps_handle_do_release_sob);
}
idr_destroy(&mgr->handles);
}
static void hl_ctx_fini(struct hl_ctx *ctx) static void hl_ctx_fini(struct hl_ctx *ctx)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
int i; int i;
/* Release all allocated pending cb's, those cb's were never
* scheduled so it is safe to release them here
*/
hl_pending_cb_list_flush(ctx);
/* Release all allocated HW block mapped list entries and destroy /* Release all allocated HW block mapped list entries and destroy
* the mutex. * the mutex.
*/ */
...@@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
hl_cb_va_pool_fini(ctx); hl_cb_va_pool_fini(ctx);
hl_vm_ctx_fini(ctx); hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid); hl_asid_free(hdev, ctx->asid);
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
/* Scrub both SRAM and DRAM */ /* Scrub both SRAM and DRAM */
hdev->asic_funcs->scrub_device_mem(hdev, 0, 0); hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
...@@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) ...@@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{ {
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1) if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
return; return;
dev_warn(hdev->dev,
"user process released device but its command submissions are still executing\n");
} }
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
...@@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
kref_init(&ctx->refcount); kref_init(&ctx->refcount);
ctx->cs_sequence = 1; ctx->cs_sequence = 1;
INIT_LIST_HEAD(&ctx->pending_cb_list);
spin_lock_init(&ctx->pending_cb_lock);
spin_lock_init(&ctx->cs_lock); spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_ctx_switch_token, 1); atomic_set(&ctx->thread_ctx_switch_token, 1);
atomic_set(&ctx->thread_pending_cb_token, 1);
ctx->thread_ctx_switch_wait_token = 0; ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
sizeof(struct hl_fence *), sizeof(struct hl_fence *),
...@@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
goto err_cb_va_pool_fini; goto err_cb_va_pool_fini;
} }
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid); dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
} }
...@@ -229,31 +280,86 @@ int hl_ctx_put(struct hl_ctx *ctx) ...@@ -229,31 +280,86 @@ int hl_ctx_put(struct hl_ctx *ctx)
return kref_put(&ctx->refcount, hl_ctx_do_release); return kref_put(&ctx->refcount, hl_ctx_do_release);
} }
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) /*
* hl_ctx_get_fence_locked - get CS fence under CS lock
*
* @ctx: pointer to the context structure.
* @seq: CS sequences number
*
* @return valid fence pointer on success, NULL if fence is gone, otherwise
* error pointer.
*
* NOTE: this function shall be called with cs_lock locked
*/
static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq)
{ {
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
struct hl_fence *fence; struct hl_fence *fence;
spin_lock(&ctx->cs_lock); if (seq >= ctx->cs_sequence)
if (seq >= ctx->cs_sequence) {
spin_unlock(&ctx->cs_lock);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
}
if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) { if (seq + asic_prop->max_pending_cs < ctx->cs_sequence)
spin_unlock(&ctx->cs_lock);
return NULL; return NULL;
}
fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]; fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
hl_fence_get(fence); hl_fence_get(fence);
return fence;
}
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{
struct hl_fence *fence;
spin_lock(&ctx->cs_lock);
fence = hl_ctx_get_fence_locked(ctx, seq);
spin_unlock(&ctx->cs_lock); spin_unlock(&ctx->cs_lock);
return fence; return fence;
} }
/*
* hl_ctx_get_fences - get multiple CS fences under the same CS lock
*
* @ctx: pointer to the context structure.
* @seq_arr: array of CS sequences to wait for
* @fence: fence array to store the CS fences
* @arr_len: length of seq_arr and fence_arr
*
* @return 0 on success, otherwise non 0 error code
*/
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
struct hl_fence **fence, u32 arr_len)
{
struct hl_fence **fence_arr_base = fence;
int i, rc = 0;
spin_lock(&ctx->cs_lock);
for (i = 0; i < arr_len; i++, fence++) {
u64 seq = seq_arr[i];
*fence = hl_ctx_get_fence_locked(ctx, seq);
if (IS_ERR(*fence)) {
dev_err(ctx->hdev->dev,
"Failed to get fence for CS with seq 0x%llx\n",
seq);
rc = PTR_ERR(*fence);
break;
}
}
spin_unlock(&ctx->cs_lock);
if (rc)
hl_fences_put(fence_arr_base, i);
return rc;
}
/* /*
* hl_ctx_mgr_init - initialize the context manager * hl_ctx_mgr_init - initialize the context manager
* *
......
...@@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data) ...@@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data)
if (first) { if (first) {
first = false; first = false;
seq_puts(s, "\n"); seq_puts(s, "\n");
seq_puts(s, " user virtual address size dma dir\n"); seq_puts(s, " pid user virtual address size dma dir\n");
seq_puts(s, "----------------------------------------------------------\n"); seq_puts(s, "----------------------------------------------------------\n");
} }
seq_printf(s, seq_printf(s, " %-7d 0x%-14llx %-10llu %-30s\n",
" 0x%-14llx %-10u %-30s\n", userptr->pid, userptr->addr, userptr->size,
userptr->addr, userptr->size, dma_dir[userptr->dir]); dma_dir[userptr->dir]);
} }
spin_unlock(&dev_entry->userptr_spinlock); spin_unlock(&dev_entry->userptr_spinlock);
...@@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data) ...@@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data)
struct hl_vm_hash_node *hnode; struct hl_vm_hash_node *hnode;
struct hl_userptr *userptr; struct hl_userptr *userptr;
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
enum vm_type_t *vm_type; enum vm_type *vm_type;
bool once = true; bool once = true;
u64 j; u64 j;
int i; int i;
...@@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data) ...@@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
if (*vm_type == VM_TYPE_USERPTR) { if (*vm_type == VM_TYPE_USERPTR) {
userptr = hnode->ptr; userptr = hnode->ptr;
seq_printf(s, seq_printf(s,
" 0x%-14llx %-10u\n", " 0x%-14llx %-10llu\n",
hnode->vaddr, userptr->size); hnode->vaddr, userptr->size);
} else { } else {
phys_pg_pack = hnode->ptr; phys_pg_pack = hnode->ptr;
...@@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data) ...@@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data)
return 0; return 0;
} }
static int userptr_lookup_show(struct seq_file *s, void *data)
{
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct scatterlist *sg;
struct hl_userptr *userptr;
bool first = true;
u64 total_npages, npages, sg_start, sg_end;
dma_addr_t dma_addr;
int i;
spin_lock(&dev_entry->userptr_spinlock);
list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
if (dev_entry->userptr_lookup >= userptr->addr &&
dev_entry->userptr_lookup < userptr->addr + userptr->size) {
total_npages = 0;
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
i) {
npages = hl_get_sg_info(sg, &dma_addr);
sg_start = userptr->addr +
total_npages * PAGE_SIZE;
sg_end = userptr->addr +
(total_npages + npages) * PAGE_SIZE;
if (dev_entry->userptr_lookup >= sg_start &&
dev_entry->userptr_lookup < sg_end) {
dma_addr += (dev_entry->userptr_lookup -
sg_start);
if (first) {
first = false;
seq_puts(s, "\n");
seq_puts(s, " user virtual address dma address pid region start region size\n");
seq_puts(s, "---------------------------------------------------------------------------------------\n");
}
seq_printf(s, " 0x%-18llx 0x%-16llx %-8u 0x%-16llx %-12llu\n",
dev_entry->userptr_lookup,
(u64)dma_addr, userptr->pid,
userptr->addr, userptr->size);
}
total_npages += npages;
}
}
}
spin_unlock(&dev_entry->userptr_spinlock);
if (!first)
seq_puts(s, "\n");
return 0;
}
static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
size_t count, loff_t *f_pos)
{
struct seq_file *s = file->private_data;
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
ssize_t rc;
u64 value;
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
dev_entry->userptr_lookup = value;
return count;
}
static int mmu_show(struct seq_file *s, void *data) static int mmu_show(struct seq_file *s, void *data)
{ {
struct hl_debugfs_entry *entry = s->private; struct hl_debugfs_entry *entry = s->private;
...@@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data)
return 0; return 0;
} }
phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val; hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
if (hops_info.scrambled_vaddr && if (hops_info.scrambled_vaddr &&
(dev_entry->mmu_addr != hops_info.scrambled_vaddr)) (dev_entry->mmu_addr != hops_info.scrambled_vaddr))
...@@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, ...@@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_ctx *ctx = hdev->compute_ctx; struct hl_ctx *ctx = hdev->compute_ctx;
struct hl_vm_hash_node *hnode; struct hl_vm_hash_node *hnode;
u64 end_address, range_size;
struct hl_userptr *userptr; struct hl_userptr *userptr;
enum vm_type_t *vm_type; enum vm_type *vm_type;
bool valid = false; bool valid = false;
u64 end_address;
u32 range_size;
int i, rc = 0; int i, rc = 0;
if (!ctx) { if (!ctx) {
...@@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf, ...@@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
return 0; return 0;
} }
static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
ssize_t rc;
down_read(&entry->state_dump_sem);
if (!entry->state_dump[entry->state_dump_head])
rc = 0;
else
rc = simple_read_from_buffer(
buf, count, ppos,
entry->state_dump[entry->state_dump_head],
strlen(entry->state_dump[entry->state_dump_head]));
up_read(&entry->state_dump_sem);
return rc;
}
static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
ssize_t rc;
u32 size;
int i;
rc = kstrtouint_from_user(buf, count, 10, &size);
if (rc)
return rc;
if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
dev_err(hdev->dev, "Invalid number of dumps to skip\n");
return -EINVAL;
}
if (entry->state_dump[entry->state_dump_head]) {
down_write(&entry->state_dump_sem);
for (i = 0; i < size; ++i) {
vfree(entry->state_dump[entry->state_dump_head]);
entry->state_dump[entry->state_dump_head] = NULL;
if (entry->state_dump_head > 0)
entry->state_dump_head--;
else
entry->state_dump_head =
ARRAY_SIZE(entry->state_dump) - 1;
}
up_write(&entry->state_dump_sem);
}
return count;
}
static const struct file_operations hl_data32b_fops = { static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.read = hl_data_read32, .read = hl_data_read32,
...@@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = { ...@@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = {
.read = hl_security_violations_read .read = hl_security_violations_read
}; };
static const struct file_operations hl_state_dump_fops = {
.owner = THIS_MODULE,
.read = hl_state_dump_read,
.write = hl_state_dump_write
};
static const struct hl_info_list hl_debugfs_list[] = { static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL}, {"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL}, {"command_submission", command_submission_show, NULL},
{"command_submission_jobs", command_submission_jobs_show, NULL}, {"command_submission_jobs", command_submission_jobs_show, NULL},
{"userptr", userptr_show, NULL}, {"userptr", userptr_show, NULL},
{"vm", vm_show, NULL}, {"vm", vm_show, NULL},
{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
{"mmu", mmu_show, mmu_asid_va_write}, {"mmu", mmu_show, mmu_asid_va_write},
{"engines", engines_show, NULL} {"engines", engines_show, NULL}
}; };
...@@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev) ...@@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
INIT_LIST_HEAD(&dev_entry->userptr_list); INIT_LIST_HEAD(&dev_entry->userptr_list);
INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
mutex_init(&dev_entry->file_mutex); mutex_init(&dev_entry->file_mutex);
init_rwsem(&dev_entry->state_dump_sem);
spin_lock_init(&dev_entry->cb_spinlock); spin_lock_init(&dev_entry->cb_spinlock);
spin_lock_init(&dev_entry->cs_spinlock); spin_lock_init(&dev_entry->cs_spinlock);
spin_lock_init(&dev_entry->cs_job_spinlock); spin_lock_init(&dev_entry->cs_job_spinlock);
...@@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) ...@@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry->root, dev_entry->root,
&hdev->skip_reset_on_timeout); &hdev->skip_reset_on_timeout);
debugfs_create_file("state_dump",
0600,
dev_entry->root,
dev_entry,
&hl_state_dump_fops);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name, debugfs_create_file(hl_debugfs_list[i].name,
0444, 0444,
...@@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev) ...@@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
void hl_debugfs_remove_device(struct hl_device *hdev) void hl_debugfs_remove_device(struct hl_device *hdev)
{ {
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
int i;
debugfs_remove_recursive(entry->root); debugfs_remove_recursive(entry->root);
...@@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev) ...@@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
vfree(entry->blob_desc.data); vfree(entry->blob_desc.data);
for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
vfree(entry->state_dump[i]);
kfree(entry->entry_arr); kfree(entry->entry_arr);
} }
...@@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) ...@@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
spin_unlock(&dev_entry->ctx_mem_hash_spinlock); spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
} }
/**
* hl_debugfs_set_state_dump - register state dump making it accessible via
* debugfs
* @hdev: pointer to the device structure
* @data: the actual dump data
* @length: the length of the data
*/
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
unsigned long length)
{
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
down_write(&dev_entry->state_dump_sem);
dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
ARRAY_SIZE(dev_entry->state_dump);
vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
dev_entry->state_dump[dev_entry->state_dump_head] = data;
up_write(&dev_entry->state_dump_sem);
}
void __init hl_debugfs_init(void) void __init hl_debugfs_init(void)
{ {
hl_debug_root = debugfs_create_dir("habanalabs", NULL); hl_debug_root = debugfs_create_dir("habanalabs", NULL);
......
...@@ -7,11 +7,11 @@ ...@@ -7,11 +7,11 @@
#define pr_fmt(fmt) "habanalabs: " fmt #define pr_fmt(fmt) "habanalabs: " fmt
#include <uapi/misc/habanalabs.h>
#include "habanalabs.h" #include "habanalabs.h"
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/hwmon.h> #include <linux/hwmon.h>
#include <uapi/misc/habanalabs.h>
enum hl_device_status hl_device_status(struct hl_device *hdev) enum hl_device_status hl_device_status(struct hl_device *hdev)
{ {
...@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) ...@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
status = HL_DEVICE_STATUS_NEEDS_RESET; status = HL_DEVICE_STATUS_NEEDS_RESET;
else if (hdev->disabled) else if (hdev->disabled)
status = HL_DEVICE_STATUS_MALFUNCTION; status = HL_DEVICE_STATUS_MALFUNCTION;
else if (!hdev->init_done)
status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
else else
status = HL_DEVICE_STATUS_OPERATIONAL; status = HL_DEVICE_STATUS_OPERATIONAL;
...@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev, ...@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
case HL_DEVICE_STATUS_NEEDS_RESET: case HL_DEVICE_STATUS_NEEDS_RESET:
return false; return false;
case HL_DEVICE_STATUS_OPERATIONAL: case HL_DEVICE_STATUS_OPERATIONAL:
case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
default: default:
return true; return true;
} }
...@@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp) ...@@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
if (!hl_hpriv_put(hpriv)) if (!hl_hpriv_put(hpriv))
dev_warn(hdev->dev, dev_notice(hdev->dev,
"Device is still in use because there are live CS and/or memory mappings\n"); "User process closed FD but device still in use\n");
hdev->last_open_session_duration_jif = hdev->last_open_session_duration_jif =
jiffies - hdev->last_successful_open_jif; jiffies - hdev->last_successful_open_jif;
...@@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work) ...@@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work)
container_of(work, struct hl_device_reset_work, container_of(work, struct hl_device_reset_work,
reset_work.work); reset_work.work);
struct hl_device *hdev = device_reset_work->hdev; struct hl_device *hdev = device_reset_work->hdev;
u32 flags;
int rc; int rc;
rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD); flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
if (device_reset_work->fw_reset)
flags |= HL_RESET_FW;
rc = hl_device_reset(hdev, flags);
if ((rc == -EBUSY) && !hdev->device_fini_pending) { if ((rc == -EBUSY) && !hdev->device_fini_pending) {
dev_info(hdev->dev, dev_info(hdev->dev,
"Could not reset device. will try again in %u seconds", "Could not reset device. will try again in %u seconds",
...@@ -682,6 +691,44 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) ...@@ -682,6 +691,44 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
return rc; return rc;
} }
static void take_release_locks(struct hl_device *hdev)
{
/* Flush anyone that is inside the critical section of enqueue
* jobs to the H/W
*/
hdev->asic_funcs->hw_queues_lock(hdev);
hdev->asic_funcs->hw_queues_unlock(hdev);
/* Flush processes that are sending message to CPU */
mutex_lock(&hdev->send_cpu_message_lock);
mutex_unlock(&hdev->send_cpu_message_lock);
/* Flush anyone that is inside device open */
mutex_lock(&hdev->fpriv_list_lock);
mutex_unlock(&hdev->fpriv_list_lock);
}
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
{
if (hard_reset)
device_late_fini(hdev);
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
* H/W to the host machine
*/
hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
/* Release all pending user interrupts, each pending user interrupt
* holds a reference to user context
*/
hl_release_pending_user_interrupts(hdev);
}
/* /*
* hl_device_suspend - initiate device suspend * hl_device_suspend - initiate device suspend
* *
...@@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev) ...@@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev)
/* This blocks all other stuff that is not blocked by in_reset */ /* This blocks all other stuff that is not blocked by in_reset */
hdev->disabled = true; hdev->disabled = true;
/* take_release_locks(hdev);
* Flush anyone that is inside the critical section of enqueue
* jobs to the H/W
*/
hdev->asic_funcs->hw_queues_lock(hdev);
hdev->asic_funcs->hw_queues_unlock(hdev);
/* Flush processes that are sending message to CPU */
mutex_lock(&hdev->send_cpu_message_lock);
mutex_unlock(&hdev->send_cpu_message_lock);
rc = hdev->asic_funcs->suspend(hdev); rc = hdev->asic_funcs->suspend(hdev);
if (rc) if (rc)
...@@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) ...@@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
usleep_range(1000, 10000); usleep_range(1000, 10000);
put_task_struct(task); put_task_struct(task);
} else {
dev_warn(hdev->dev,
"Can't get task struct for PID so giving up on killing process\n");
mutex_unlock(&hdev->fpriv_list_lock);
return -ETIME;
} }
} }
...@@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev) ...@@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
int hl_device_reset(struct hl_device *hdev, u32 flags) int hl_device_reset(struct hl_device *hdev, u32 flags)
{ {
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
bool hard_reset, from_hard_reset_thread, hard_instead_soft = false; bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
int i, rc; int i, rc;
if (!hdev->init_done) { if (!hdev->init_done) {
...@@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return 0; return 0;
} }
hard_reset = (flags & HL_RESET_HARD) != 0; hard_reset = !!(flags & HL_RESET_HARD);
from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0; from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
fw_reset = !!(flags & HL_RESET_FW);
if (!hard_reset && !hdev->supports_soft_reset) { if (!hard_reset && !hdev->supports_soft_reset) {
hard_instead_soft = true; hard_instead_soft = true;
...@@ -947,11 +991,13 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -947,11 +991,13 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
else else
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
/* /* If reset is due to heartbeat, device CPU is no responsive in
* if reset is due to heartbeat, device CPU is no responsive in * which case no point sending PCI disable message to it.
* which case no point sending PCI disable message to it *
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/ */
if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) { if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
/* Disable PCI access from device F/W so he won't send /* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at * us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W * the halt_engines function and we can't have the F/W
...@@ -970,15 +1016,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -970,15 +1016,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
/* This also blocks future CS/VM/JOB completion operations */ /* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true; hdev->disabled = true;
/* Flush anyone that is inside the critical section of enqueue take_release_locks(hdev);
* jobs to the H/W
*/
hdev->asic_funcs->hw_queues_lock(hdev);
hdev->asic_funcs->hw_queues_unlock(hdev);
/* Flush anyone that is inside device open */
mutex_lock(&hdev->fpriv_list_lock);
mutex_unlock(&hdev->fpriv_list_lock);
dev_err(hdev->dev, "Going to RESET device!\n"); dev_err(hdev->dev, "Going to RESET device!\n");
} }
...@@ -989,6 +1027,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -989,6 +1027,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->process_kill_trial_cnt = 0; hdev->process_kill_trial_cnt = 0;
hdev->device_reset_work.fw_reset = fw_reset;
/* /*
* Because the reset function can't run from heartbeat work, * Because the reset function can't run from heartbeat work,
* we need to call the reset function from a dedicated work. * we need to call the reset function from a dedicated work.
...@@ -999,31 +1039,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -999,31 +1039,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return 0; return 0;
} }
if (hard_reset) { cleanup_resources(hdev, hard_reset, fw_reset);
device_late_fini(hdev);
/*
* Now that the heartbeat thread is closed, flush processes
* which are sending messages to CPU
*/
mutex_lock(&hdev->send_cpu_message_lock);
mutex_unlock(&hdev->send_cpu_message_lock);
}
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
* H/W to the host machine
*/
hdev->asic_funcs->halt_engines(hdev, hard_reset);
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
/* Release all pending user interrupts, each pending user interrupt
* holds a reference to user context
*/
hl_release_pending_user_interrupts(hdev);
kill_processes: kill_processes:
if (hard_reset) { if (hard_reset) {
...@@ -1057,12 +1073,15 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -1057,12 +1073,15 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
} }
/* Reset the H/W. It will be in idle state after this returns */ /* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, hard_reset); hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
if (hard_reset) { if (hard_reset) {
hdev->fw_loader.linux_loaded = false;
/* Release kernel context */ /* Release kernel context */
if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
hdev->kernel_ctx = NULL; hdev->kernel_ctx = NULL;
hl_vm_fini(hdev); hl_vm_fini(hdev);
hl_mmu_fini(hdev); hl_mmu_fini(hdev);
hl_eq_reset(hdev, &hdev->event_queue); hl_eq_reset(hdev, &hdev->event_queue);
...@@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc) if (rc)
goto user_interrupts_fini; goto user_interrupts_fini;
/* initialize completion structure for multi CS wait */
hl_multi_cs_completion_init(hdev);
/* /*
* Initialize the H/W queues. Must be done before hw_init, because * Initialize the H/W queues. Must be done before hw_init, because
* there the addresses of the kernel queue are being written to the * there the addresses of the kernel queue are being written to the
...@@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->compute_ctx = NULL; hdev->compute_ctx = NULL;
hdev->asic_funcs->state_dump_init(hdev);
hl_debugfs_add_device(hdev); hl_debugfs_add_device(hdev);
/* debugfs nodes are created in hl_ctx_init so it must be called after /* debugfs nodes are created in hl_ctx_init so it must be called after
...@@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev)
/* Mark device as disabled */ /* Mark device as disabled */
hdev->disabled = true; hdev->disabled = true;
/* Flush anyone that is inside the critical section of enqueue take_release_locks(hdev);
* jobs to the H/W
*/
hdev->asic_funcs->hw_queues_lock(hdev);
hdev->asic_funcs->hw_queues_unlock(hdev);
/* Flush anyone that is inside device open */
mutex_lock(&hdev->fpriv_list_lock);
mutex_unlock(&hdev->fpriv_list_lock);
hdev->hard_reset_pending = true; hdev->hard_reset_pending = true;
hl_hwmon_fini(hdev); hl_hwmon_fini(hdev);
device_late_fini(hdev); cleanup_resources(hdev, true, false);
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
* H/W to the host machine
*/
hdev->asic_funcs->halt_engines(hdev, true);
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
/* Kill processes here after CS rollback. This is because the process /* Kill processes here after CS rollback. This is because the process
* can't really exit until all its CSs are done, which is what we * can't really exit until all its CSs are done, which is what we
...@@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev)
hl_cb_pool_fini(hdev); hl_cb_pool_fini(hdev);
/* Reset the H/W. It will be in idle state after this returns */ /* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true); hdev->asic_funcs->hw_fini(hdev, true, false);
hdev->fw_loader.linux_loaded = false;
/* Release kernel context */ /* Release kernel context */
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright 2016-2019 HabanaLabs, Ltd. * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved. * All Rights Reserved.
*/ */
...@@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, ...@@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
/* set fence to a non valid value */ /* set fence to a non valid value */
pkt->fence = cpu_to_le32(UINT_MAX); pkt->fence = cpu_to_le32(UINT_MAX);
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); /*
if (rc) { * The CPU queue is a synchronous queue with an effective depth of
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); * a single entry (although it is allocated with room for multiple
goto out; * entries). We lock on it using 'send_cpu_message_lock' which
} * serializes accesses to the CPU queue.
* Which means that we don't need to lock the access to the entire H/W
* queues module when submitting a JOB to the CPU queue.
*/
hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr);
if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
expected_ack_val = queue->pi; expected_ack_val = queue->pi;
...@@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev, ...@@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
hdev->event_queue.check_eqe_index = false; hdev->event_queue.check_eqe_index = false;
/* Read FW application security bits again */ /* Read FW application security bits again */
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) { if (prop->fw_cpu_boot_dev_sts0_valid) {
hdev->asic_prop.fw_app_cpu_boot_dev_sts0 = prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
RREG32(sts_boot_dev_sts0_reg); if (prop->fw_app_cpu_boot_dev_sts0 &
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_EQ_INDEX_EN) CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
hdev->event_queue.check_eqe_index = true; hdev->event_queue.check_eqe_index = true;
} }
if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid) if (prop->fw_cpu_boot_dev_sts1_valid)
hdev->asic_prop.fw_app_cpu_boot_dev_sts1 = prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
RREG32(sts_boot_dev_sts1_reg);
out: out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
...@@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) ...@@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
} else { } else {
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE); WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
msleep(static_loader->cpu_reset_wait_msec); msleep(static_loader->cpu_reset_wait_msec);
/* Must clear this register in order to prevent preboot
* from reading WFE after reboot
*/
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
} }
hdev->device_cpu_is_halted = true; hdev->device_cpu_is_halted = true;
...@@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) ...@@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
dev_err(hdev->dev, dev_err(hdev->dev,
"Device boot progress - Thermal Sensor initialization failed\n"); "Device boot progress - Thermal Sensor initialization failed\n");
break; break;
case CPU_BOOT_STATUS_SECURITY_READY:
dev_err(hdev->dev,
"Device boot progress - Stuck in preboot after security initialization\n");
break;
default: default:
dev_err(hdev->dev, dev_err(hdev->dev,
"Device boot progress - Invalid status code %d\n", "Device boot progress - Invalid status code %d\n",
...@@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev) ...@@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
* b. Check whether hard reset is done by boot cpu * b. Check whether hard reset is done by boot cpu
* 3. FW application - a. Fetch fw application security status * 3. FW application - a. Fetch fw application security status
* b. Check whether hard reset is done by fw app * b. Check whether hard reset is done by fw app
*
* Preboot:
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
* If set, then mark GIC controller to be disabled.
*/ */
prop->hard_reset_done_by_fw = prop->hard_reset_done_by_fw =
!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
...@@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev) ...@@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
if (!hdev->asic_prop.gic_interrupts_enable && if (!hdev->asic_prop.gic_interrupts_enable &&
!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & !(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) { CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl; dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl; dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Using a single interrupt interface towards cpucp"); "Using a single interrupt interface towards cpucp");
...@@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, ...@@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
/* Read FW application security bits */ /* Read FW application security bits */
if (prop->fw_cpu_boot_dev_sts0_valid) { if (prop->fw_cpu_boot_dev_sts0_valid) {
prop->fw_app_cpu_boot_dev_sts0 = prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
RREG32(cpu_boot_dev_sts0_reg);
if (prop->fw_app_cpu_boot_dev_sts0 & if (prop->fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
...@@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, ...@@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
} }
if (prop->fw_cpu_boot_dev_sts1_valid) { if (prop->fw_cpu_boot_dev_sts1_valid) {
prop->fw_app_cpu_boot_dev_sts1 = prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
RREG32(cpu_boot_dev_sts1_reg);
dev_dbg(hdev->dev, dev_dbg(hdev->dev,
"Firmware application CPU status1 %#x\n", "Firmware application CPU status1 %#x\n",
...@@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, ...@@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
dev_info(hdev->dev, dev_info(hdev->dev,
"Loading firmware to device, may take some time...\n"); "Loading firmware to device, may take some time...\n");
/*
* In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
* It will be updated from FW after hl_fw_dynamic_request_descriptor().
*/
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE, rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
......
This diff is collapsed.
...@@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_cb_mgr_init(&hpriv->cb_mgr); hl_cb_mgr_init(&hpriv->cb_mgr);
hl_ctx_mgr_init(&hpriv->ctx_mgr); hl_ctx_mgr_init(&hpriv->ctx_mgr);
hpriv->taskpid = find_get_pid(current->pid); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
mutex_lock(&hdev->fpriv_list_lock); mutex_lock(&hdev->fpriv_list_lock);
...@@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
out_err: out_err:
mutex_unlock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock);
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL; filp->private_data = NULL;
...@@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->asic_prop.fw_security_enabled = false; hdev->asic_prop.fw_security_enabled = false;
/* Assign status description string */ /* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
"disabled", HL_STR_MAX); "operational", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
"in reset", HL_STR_MAX); "in reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
"disabled", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
"needs reset", HL_STR_MAX); "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
hdev->major = hl_major; hdev->major = hl_major;
hdev->reset_on_lockup = reset_on_lockup; hdev->reset_on_lockup = reset_on_lockup;
...@@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) ...@@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
result = PCI_ERS_RESULT_NONE; result = PCI_ERS_RESULT_NONE;
} }
hdev->asic_funcs->halt_engines(hdev, true); hdev->asic_funcs->halt_engines(hdev, true, false);
return result; return result;
} }
......
...@@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) ...@@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.first_available_interrupt_id = hw_ip.first_available_interrupt_id =
prop->first_available_user_msix_interrupt; prop->first_available_user_msix_interrupt;
hw_ip.server_type = prop->server_type;
return copy_to_user(out, &hw_ip, return copy_to_user(out, &hw_ip,
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) ...@@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
}
/* Make sure that if we arrive here again without init was called we /* Make sure that if we arrive here again without init was
* won't cause kernel panic. This can happen for example if we fail * called we won't cause kernel panic. This can happen for
* during hard reset code at certain points * example if we fail during hard reset code at certain points
*/ */
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
}
} }
/** /**
......
...@@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev) ...@@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev)
goto unmap_pci_bars; goto unmap_pci_bars;
} }
dma_set_max_seg_size(&pdev->dev, U32_MAX);
return 0; return 0;
unmap_pci_bars: unmap_pci_bars:
......
This diff is collapsed.
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
#include <linux/pci.h> #include <linux/pci.h>
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
bool curr)
{ {
struct cpucp_packet pkt; struct cpucp_packet pkt;
u32 used_pll_idx; u32 used_pll_idx;
...@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, ...@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
return (long) result; return (long) result;
} }
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
u64 freq)
{ {
struct cpucp_packet pkt; struct cpucp_packet pkt;
u32 used_pll_idx; u32 used_pll_idx;
...@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr, ...@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
char *str; char str[HL_STR_MAX];
if (atomic_read(&hdev->in_reset)) strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
str = "In reset";
else if (hdev->disabled) /* use uppercase for backward compatibility */
str = "Malfunction"; str[0] = 'A' + (str[0] - 'a');
else if (hdev->needs_reset)
str = "Needs Reset";
else
str = "Operational";
return sprintf(buf, "%s\n", str); return sprintf(buf, "%s\n", str);
} }
......
This diff is collapsed.
...@@ -36,6 +36,8 @@ ...@@ -36,6 +36,8 @@
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \ #define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
NUMBER_OF_CPU_HW_QUEUES) NUMBER_OF_CPU_HW_QUEUES)
#define GAUDI_STREAM_MASTER_ARR_SIZE 8
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES) #if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES" #error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
#endif #endif
...@@ -50,6 +52,8 @@ ...@@ -50,6 +52,8 @@
#define DC_POWER_DEFAULT_PCI 60000 /* 60W */ #define DC_POWER_DEFAULT_PCI 60000 /* 60W */
#define DC_POWER_DEFAULT_PMC 60000 /* 60W */ #define DC_POWER_DEFAULT_PMC 60000 /* 60W */
#define DC_POWER_DEFAULT_PMC_SEC 97000 /* 97W */
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */ #define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define TPC_ENABLED_MASK 0xFF #define TPC_ENABLED_MASK 0xFF
...@@ -62,7 +66,7 @@ ...@@ -62,7 +66,7 @@
#define DMA_MAX_TRANSFER_SIZE U32_MAX #define DMA_MAX_TRANSFER_SIZE U32_MAX
#define GAUDI_DEFAULT_CARD_NAME "HL2000" #define GAUDI_DEFAULT_CARD_NAME "HL205"
#define GAUDI_MAX_PENDING_CS SZ_16K #define GAUDI_MAX_PENDING_CS SZ_16K
...@@ -117,6 +121,7 @@ ...@@ -117,6 +121,7 @@
(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \ (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2) mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
#define MONITOR_MAX_SOBS 8
/* DRAM Memory Map */ /* DRAM Memory Map */
...@@ -200,6 +205,18 @@ ...@@ -200,6 +205,18 @@
#define HW_CAP_TPC_MASK GENMASK(31, 24) #define HW_CAP_TPC_MASK GENMASK(31, 24)
#define HW_CAP_TPC_SHIFT 24 #define HW_CAP_TPC_SHIFT 24
#define NEXT_SYNC_OBJ_ADDR_INTERVAL \
(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)
#define NUM_OF_MME_ENGINES 2
#define NUM_OF_MME_SUB_ENGINES 2
#define NUM_OF_TPC_ENGINES 8
#define NUM_OF_DMA_ENGINES 8
#define NUM_OF_QUEUES 5
#define NUM_OF_STREAMS 4
#define NUM_OF_FENCES 4
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39) #define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
#define GAUDI_PCI_TO_CPU_ADDR(addr) \ #define GAUDI_PCI_TO_CPU_ADDR(addr) \
do { \ do { \
......
...@@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev, ...@@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev,
return -EINVAL; return -EINVAL;
} }
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER,
hdev->compute_ctx->asid);
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER,
hdev->compute_ctx->asid);
msb = upper_32_bits(input->buffer_address) >> 8; msb = upper_32_bits(input->buffer_address) >> 8;
msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK; msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK;
WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb); WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb);
......
...@@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2);
...@@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) ...@@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2);
......
...@@ -350,6 +350,8 @@ static u32 goya_all_events[] = { ...@@ -350,6 +350,8 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
}; };
static s64 goya_state_dump_specs_props[SP_MAX] = {0};
static int goya_mmu_clear_pgt_range(struct hl_device *hdev); static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
static int goya_mmu_set_dram_default_page(struct hl_device *hdev); static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
...@@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev) ...@@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER; prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
} }
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->dram_base_address = DRAM_PHYS_BASE; prop->dram_base_address = DRAM_PHYS_BASE;
...@@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) ...@@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->hard_reset_done_by_fw = false; prop->hard_reset_done_by_fw = false;
prop->gic_interrupts_enable = true; prop->gic_interrupts_enable = true;
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
return 0; return 0;
} }
...@@ -649,14 +654,14 @@ static int goya_early_init(struct hl_device *hdev) ...@@ -649,14 +654,14 @@ static int goya_early_init(struct hl_device *hdev)
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) { if (rc) {
if (hdev->reset_on_preboot_fail) if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true); hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini; goto pci_fini;
} }
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev, dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n"); "H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true); hdev->asic_funcs->hw_fini(hdev, true, false);
} }
if (!hdev->pldm) { if (!hdev->pldm) {
...@@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev) ...@@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev)
hdev->supports_coresight = true; hdev->supports_coresight = true;
hdev->supports_soft_reset = true; hdev->supports_soft_reset = true;
hdev->allow_external_soft_reset = true; hdev->allow_external_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;
goya_set_pci_memory_regions(hdev); hdev->asic_funcs->set_pci_memory_regions(hdev);
return 0; return 0;
...@@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev) ...@@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev)
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
} }
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
{ {
u32 wait_timeout_ms; u32 wait_timeout_ms;
...@@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev) ...@@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
struct fw_load_mgr *fw_loader = &hdev->fw_loader; struct fw_load_mgr *fw_loader = &hdev->fw_loader;
/* fill common fields */ /* fill common fields */
fw_loader->linux_loaded = false;
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE; fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE; fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC; fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
...@@ -2696,14 +2703,7 @@ static int goya_hw_init(struct hl_device *hdev) ...@@ -2696,14 +2703,7 @@ static int goya_hw_init(struct hl_device *hdev)
return rc; return rc;
} }
/* static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
* goya_hw_fini - Goya hardware tear-down code
*
* @hdev: pointer to hl_device structure
* @hard_reset: should we do hard reset to all engines or just reset the
* compute/dma engines
*/
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
u32 reset_timeout_ms, cpu_timeout_ms, status; u32 reset_timeout_ms, cpu_timeout_ms, status;
...@@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev) ...@@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev)
return goya_init_iatu(hdev); return goya_init_iatu(hdev);
} }
static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size) void *cpu_addr, dma_addr_t dma_addr, size_t size)
{ {
int rc; int rc;
...@@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) ...@@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
>> EQ_CTL_EVENT_TYPE_SHIFT); >> EQ_CTL_EVENT_TYPE_SHIFT);
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
return;
}
goya->events_stat[event_type]++; goya->events_stat[event_type]++;
goya->events_stat_aggregate[event_type]++; goya->events_stat_aggregate[event_type]++;
...@@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev) ...@@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev)
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
} }
static void goya_collective_wait_init_cs(struct hl_cs *cs) static int goya_collective_wait_init_cs(struct hl_cs *cs)
{ {
return 0;
} }
static int goya_collective_wait_create_jobs(struct hl_device *hdev, static int goya_collective_wait_create_jobs(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id) u32 collective_engine_id, u32 encaps_signal_offset)
{ {
return -EINVAL; return -EINVAL;
} }
...@@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx) ...@@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
} }
} }
static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
struct hl_sync_to_engine_map *map)
{
/* Not implemented */
return 0;
}
static int goya_monitor_valid(struct hl_mon_state_dump *mon)
{
/* Not implemented */
return 0;
}
static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
struct hl_device *hdev,
struct hl_mon_state_dump *mon)
{
/* Not implemented */
return 0;
}
static int goya_print_fences_single_engine(
struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
size_t *size, size_t *offset)
{
/* Not implemented */
return 0;
}
static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
.monitor_valid = goya_monitor_valid,
.print_single_monitor = goya_print_single_monitor,
.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
.print_fences_single_engine = goya_print_fences_single_engine,
};
static void goya_state_dump_init(struct hl_device *hdev)
{
/* Not implemented */
hdev->state_dump_specs.props = goya_state_dump_specs_props;
hdev->state_dump_specs.funcs = goya_state_dump_funcs;
}
static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
{
return 0;
}
static u32 *goya_get_stream_master_qid_arr(void)
{
return NULL;
}
static const struct hl_asic_funcs goya_funcs = { static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init, .early_init = goya_early_init,
.early_fini = goya_early_fini, .early_fini = goya_early_fini,
...@@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = {
.halt_engines = goya_halt_engines, .halt_engines = goya_halt_engines,
.suspend = goya_suspend, .suspend = goya_suspend,
.resume = goya_resume, .resume = goya_resume,
.cb_mmap = goya_cb_mmap, .mmap = goya_mmap,
.ring_doorbell = goya_ring_doorbell, .ring_doorbell = goya_ring_doorbell,
.pqe_write = goya_pqe_write, .pqe_write = goya_pqe_write,
.asic_dma_alloc_coherent = goya_dma_alloc_coherent, .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
...@@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = {
.enable_events_from_fw = goya_enable_events_from_fw, .enable_events_from_fw = goya_enable_events_from_fw,
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx, .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
.init_firmware_loader = goya_init_firmware_loader, .init_firmware_loader = goya_init_firmware_loader,
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
.state_dump_init = goya_state_dump_init,
.get_sob_addr = &goya_get_sob_addr,
.set_pci_memory_regions = goya_set_pci_memory_regions,
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
}; };
/* /*
......
...@@ -98,6 +98,18 @@ struct hl_eq_fw_alive { ...@@ -98,6 +98,18 @@ struct hl_eq_fw_alive {
__u8 pad[7]; __u8 pad[7];
}; };
enum hl_pcie_addr_dec_cause {
PCIE_ADDR_DEC_HBW_ERR_RESP,
PCIE_ADDR_DEC_LBW_ERR_RESP,
PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR
};
struct hl_eq_pcie_addr_dec_data {
/* enum hl_pcie_addr_dec_cause */
__u8 addr_dec_cause;
__u8 pad[7];
};
struct hl_eq_entry { struct hl_eq_entry {
struct hl_eq_header hdr; struct hl_eq_header hdr;
union { union {
...@@ -106,6 +118,7 @@ struct hl_eq_entry { ...@@ -106,6 +118,7 @@ struct hl_eq_entry {
struct hl_eq_sm_sei_data sm_sei_data; struct hl_eq_sm_sei_data sm_sei_data;
struct cpucp_pkt_sync_err pkt_sync_err; struct cpucp_pkt_sync_err pkt_sync_err;
struct hl_eq_fw_alive fw_alive; struct hl_eq_fw_alive fw_alive;
struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data;
__le64 data[7]; __le64 data[7];
}; };
}; };
...@@ -116,7 +129,7 @@ struct hl_eq_entry { ...@@ -116,7 +129,7 @@ struct hl_eq_entry {
#define EQ_CTL_READY_MASK 0x80000000 #define EQ_CTL_READY_MASK 0x80000000
#define EQ_CTL_EVENT_TYPE_SHIFT 16 #define EQ_CTL_EVENT_TYPE_SHIFT 16
#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 #define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000
#define EQ_CTL_INDEX_SHIFT 0 #define EQ_CTL_INDEX_SHIFT 0
#define EQ_CTL_INDEX_MASK 0x0000FFFF #define EQ_CTL_INDEX_MASK 0x0000FFFF
...@@ -300,7 +313,7 @@ enum pq_init_status { ...@@ -300,7 +313,7 @@ enum pq_init_status {
* The packet's arguments specify the desired sensor and the field to * The packet's arguments specify the desired sensor and the field to
* set. * set.
* *
* CPUCP_PACKET_PCIE_THROUGHPUT_GET * CPUCP_PACKET_PCIE_THROUGHPUT_GET -
* Get throughput of PCIe. * Get throughput of PCIe.
* The packet's arguments specify the transaction direction (TX/RX). * The packet's arguments specify the transaction direction (TX/RX).
* The window measurement is 10[msec], and the return value is in KB/sec. * The window measurement is 10[msec], and the return value is in KB/sec.
...@@ -309,19 +322,19 @@ enum pq_init_status { ...@@ -309,19 +322,19 @@ enum pq_init_status {
* Replay count measures number of "replay" events, which is basicly * Replay count measures number of "replay" events, which is basicly
* number of retries done by PCIe. * number of retries done by PCIe.
* *
* CPUCP_PACKET_TOTAL_ENERGY_GET * CPUCP_PACKET_TOTAL_ENERGY_GET -
* Total Energy is measurement of energy from the time FW Linux * Total Energy is measurement of energy from the time FW Linux
* is loaded. It is calculated by multiplying the average power * is loaded. It is calculated by multiplying the average power
* by time (passed from armcp start). The units are in MilliJouls. * by time (passed from armcp start). The units are in MilliJouls.
* *
* CPUCP_PACKET_PLL_INFO_GET * CPUCP_PACKET_PLL_INFO_GET -
* Fetch frequencies of PLL from the required PLL IP. * Fetch frequencies of PLL from the required PLL IP.
* The packet's arguments specify the device PLL type * The packet's arguments specify the device PLL type
* Pll type is the PLL from device pll_index enum. * Pll type is the PLL from device pll_index enum.
* The result is composed of 4 outputs, each is 16-bit * The result is composed of 4 outputs, each is 16-bit
* frequency in MHz. * frequency in MHz.
* *
* CPUCP_PACKET_POWER_GET * CPUCP_PACKET_POWER_GET -
* Fetch the present power consumption of the device (Current * Voltage). * Fetch the present power consumption of the device (Current * Voltage).
* *
* CPUCP_PACKET_NIC_PFC_SET - * CPUCP_PACKET_NIC_PFC_SET -
...@@ -345,6 +358,24 @@ enum pq_init_status { ...@@ -345,6 +358,24 @@ enum pq_init_status {
* CPUCP_PACKET_MSI_INFO_SET - * CPUCP_PACKET_MSI_INFO_SET -
* set the index number for each supported msi type going from * set the index number for each supported msi type going from
* host to device * host to device
*
* CPUCP_PACKET_NIC_XPCS91_REGS_GET -
* Fetch the un/correctable counters values from the NIC MAC.
*
* CPUCP_PACKET_NIC_STAT_REGS_GET -
* Fetch various NIC MAC counters from the NIC STAT.
*
* CPUCP_PACKET_NIC_STAT_REGS_CLR -
* Clear the various NIC MAC counters in the NIC STAT.
*
* CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
* Fetch all NIC MAC counters from the NIC STAT.
*
* CPUCP_PACKET_IS_IDLE_CHECK -
* Check if the device is IDLE in regard to the DMA/compute engines
* and QMANs. The f/w will return a bitmask where each bit represents
* a different engine or QMAN according to enum cpucp_idle_mask.
* The bit will be 1 if the engine is NOT idle.
*/ */
enum cpucp_packet_id { enum cpucp_packet_id {
...@@ -385,6 +416,11 @@ enum cpucp_packet_id { ...@@ -385,6 +416,11 @@ enum cpucp_packet_id {
CPUCP_PACKET_NIC_LPBK_SET, /* internal */ CPUCP_PACKET_NIC_LPBK_SET, /* internal */
CPUCP_PACKET_NIC_MAC_CFG, /* internal */ CPUCP_PACKET_NIC_MAC_CFG, /* internal */
CPUCP_PACKET_MSI_INFO_SET, /* internal */ CPUCP_PACKET_MSI_INFO_SET, /* internal */
CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */
CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */
CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */
CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */
CPUCP_PACKET_IS_IDLE_CHECK, /* internal */
}; };
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
...@@ -414,6 +450,11 @@ enum cpucp_packet_id { ...@@ -414,6 +450,11 @@ enum cpucp_packet_id {
#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 #define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull #define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0
#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull
#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1
#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull
/* heartbeat status bits */ /* heartbeat status bits */
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
...@@ -467,7 +508,8 @@ struct cpucp_packet { ...@@ -467,7 +508,8 @@ struct cpucp_packet {
__le32 status_mask; __le32 status_mask;
}; };
__le32 reserved; /* For NIC requests */
__le32 port_index;
}; };
struct cpucp_unmask_irq_arr_packet { struct cpucp_unmask_irq_arr_packet {
...@@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet { ...@@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet {
__le32 irqs[0]; __le32 irqs[0];
}; };
struct cpucp_nic_status_packet {
struct cpucp_packet cpucp_pkt;
__le32 length;
__le32 data[0];
};
struct cpucp_array_data_packet { struct cpucp_array_data_packet {
struct cpucp_packet cpucp_pkt; struct cpucp_packet cpucp_pkt;
__le32 length; __le32 length;
...@@ -595,6 +643,18 @@ enum pll_index { ...@@ -595,6 +643,18 @@ enum pll_index {
PLL_MAX PLL_MAX
}; };
enum rl_index {
TPC_RL = 0,
MME_RL,
};
enum pvt_index {
PVT_SW,
PVT_SE,
PVT_NW,
PVT_NE
};
/* Event Queue Packets */ /* Event Queue Packets */
struct eq_generic_event { struct eq_generic_event {
...@@ -700,6 +760,15 @@ struct cpucp_mac_addr { ...@@ -700,6 +760,15 @@ struct cpucp_mac_addr {
__u8 mac_addr[ETH_ALEN]; __u8 mac_addr[ETH_ALEN];
}; };
enum cpucp_serdes_type {
TYPE_1_SERDES_TYPE,
TYPE_2_SERDES_TYPE,
HLS1_SERDES_TYPE,
HLS1H_SERDES_TYPE,
UNKNOWN_SERDES_TYPE,
MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE
};
struct cpucp_nic_info { struct cpucp_nic_info {
struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
__le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
...@@ -708,6 +777,40 @@ struct cpucp_nic_info { ...@@ -708,6 +777,40 @@ struct cpucp_nic_info {
__le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
__le16 serdes_type; /* enum cpucp_serdes_type */
__u8 reserved[6];
};
/*
* struct cpucp_nic_status - describes the status of a NIC port.
* @port: NIC port index.
* @bad_format_cnt: e.g. CRC.
* @responder_out_of_sequence_psn_cnt: e.g NAK.
* @high_ber_reinit_cnt: link reinit due to high BER.
* @correctable_err_cnt: e.g. bit-flip.
* @uncorrectable_err_cnt: e.g. MAC errors.
* @retraining_cnt: re-training counter.
* @up: is port up.
* @pcs_link: has PCS link.
* @phy_ready: is PHY ready.
* @auto_neg: is Autoneg enabled.
* @timeout_retransmission_cnt: timeout retransmission events
* @high_ber_cnt: high ber events
*/
struct cpucp_nic_status {
__le32 port;
__le32 bad_format_cnt;
__le32 responder_out_of_sequence_psn_cnt;
__le32 high_ber_reinit;
__le32 correctable_err_cnt;
__le32 uncorrectable_err_cnt;
__le32 retraining_cnt;
__u8 up;
__u8 pcs_link;
__u8 phy_ready;
__u8 auto_neg;
__le32 timeout_retransmission_cnt;
__le32 high_ber_cnt;
}; };
#endif /* CPUCP_IF_H */ #endif /* CPUCP_IF_H */
...@@ -126,6 +126,9 @@ ...@@ -126,6 +126,9 @@
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 0x4F2004 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 0x4F2004
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 0x4F3FFC #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 0x4F3FFC
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x4F4000 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x4F4000
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 0x4F4800
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0 0x4F5000
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0 0x4F5800
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 0x4F6000 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 0x4F6000
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 0x4F67FC #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 0x4F67FC
......
...@@ -449,4 +449,21 @@ enum axi_id { ...@@ -449,4 +449,21 @@ enum axi_id {
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1 #define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2 #define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_SHIFT 0
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK 0x1
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_SHIFT 1
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK 0x1FE
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_SHIFT 0
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK 0xFF
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_SHIFT 8
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK 0xFF00
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_SHIFT 16
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_MASK 0x10000
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_SHIFT 17
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK 0xFFFE0000
#define TPC0_QM_CP_STS_0_FENCE_ID_SHIFT 20
#define TPC0_QM_CP_STS_0_FENCE_ID_MASK 0x300000
#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_SHIFT 22
#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK 0x400000
#endif /* GAUDI_MASKS_H_ */ #endif /* GAUDI_MASKS_H_ */
...@@ -12,8 +12,6 @@ ...@@ -12,8 +12,6 @@
* PSOC scratch-pad registers * PSOC scratch-pad registers
*/ */
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 #define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
#define mmGIC_HOST_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 #define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2 #define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3 #define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment