Commit 9d20328d authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2020-03-24' of...

Merge tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.7:

- MMU code improvements that includes:
  - Flush MMU TLB cache only once, at the end of mapping/unmapping
    function, instead of flushing after mapping of every page.
  - Add future ASIC support by splitting properties of ASIC capabilities
    regarding mapping of host memory to regular and huge pages.

- Add debugfs interface to write and read 64-bit values from the device's
  memory/registers. Previously the driver provided interface for 32-bit
  values and this will allow the user to debug much more quickly. We saw it
  gives a boost of around 1.5 - 1.7 when reading internal memories.

- Support temperature offset via sysfs as defined in
  https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface

- Display historical maximum of various sensors.

- Print to kernel log when clock throttling occurs to due breach of power
  or thermal envelope. Also prints when clock throttling is finished
  (clock is back to optimal).

- Fix bug when moving from manual to auto power-management mode.

- Print a message ("unsupported device") to kernel log in case a GAUDI device
  is recognized.

- Small bug fixes and minor improvements to code.

* tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: fix pm manual->auto in GOYA
  habanalabs: show unsupported message for GAUDI
  habanalabs: add print upon clock change
  habanalabs: update goya firmware register map
  habanalabs: Add missing annotation for goya_hw_queues_unlock()
  habanalabs: Add missing annotation for goya_hw_queues_lock()
  habanalabs: Remove unused parse_cnt variable
  habanalabs: provide historical maximum of various sensors
  habanalabs: modify the return values of hl_read/write routines
  habanalabs: support temperature offset via sysfs
  habanalabs: ratelimit error prints of IRQs
  habanalabs: add debugfs write64/read64
  habanalabs: fix DDR bar address setting
  habanalabs: removing extra ;
  habanalabs: Avoid running restore chunks if no execute chunks
  habanalabs: Modify CS jobs counter to u16
  habanalabs: split the host MMU properties
  habanalabs: use the user CB size as a default job size
  habanalabs: flush only at the end of the map/unmap
parents bbde5709 11845501
...@@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the ...@@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the
If the IOMMU is disabled, it also allows the root user to read If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/data64
Date: Jan 2020
KernelVersion: 5.6
Contact: oded.gabbay@gmail.com
Description: Allows the root user to read or write 64 bit data directly
through the device's PCI bar. Writing to this file generates a
write transaction while reading from the file generates a read
transaction. This custom interface is needed (instead of using
the generic Linux user-space PCI mapping) because the DDR bar
is very small compared to the DDR memory and only the driver can
move the bar before and after the transaction.
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/device What: /sys/kernel/debug/habanalabs/hl<n>/device
Date: Jan 2019 Date: Jan 2019
KernelVersion: 5.1 KernelVersion: 5.1
......
...@@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) ...@@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
spin_unlock(&job->user_cb->lock); spin_unlock(&job->user_cb->lock);
hl_cb_put(job->user_cb); hl_cb_put(job->user_cb);
job->user_cb = NULL; job->user_cb = NULL;
} else if (!rc) {
job->job_cb_size = job->user_cb_size;
} }
return rc; return rc;
...@@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cb *cb; struct hl_cb *cb;
bool int_queues_only = true; bool int_queues_only = true;
u32 size_to_copy; u32 size_to_copy;
int rc, i, parse_cnt; int rc, i;
*cs_seq = ULLONG_MAX; *cs_seq = ULLONG_MAX;
...@@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
hl_debugfs_add_cs(cs); hl_debugfs_add_cs(cs);
/* Validate ALL the CS chunks before submitting the CS */ /* Validate ALL the CS chunks before submitting the CS */
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i]; struct hl_cs_chunk *chunk = &cs_chunk_array[i];
enum hl_queue_type queue_type; enum hl_queue_type queue_type;
bool is_kernel_allocated_cb; bool is_kernel_allocated_cb;
...@@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
job->cs = cs; job->cs = cs;
job->user_cb = cb; job->user_cb = cb;
job->user_cb_size = chunk->cb_size; job->user_cb_size = chunk->cb_size;
if (is_kernel_allocated_cb)
job->job_cb_size = cb->size;
else
job->job_cb_size = chunk->cb_size;
job->hw_queue_id = chunk->queue_index; job->hw_queue_id = chunk->queue_index;
cs->jobs_in_queue_cnt[job->hw_queue_id]++; cs->jobs_in_queue_cnt[job->hw_queue_id]++;
...@@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
union hl_cs_args *args = data; union hl_cs_args *args = data;
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
void __user *chunks; void __user *chunks_execute, *chunks_restore;
u32 num_chunks; u32 num_chunks_execute, num_chunks_restore;
u64 cs_seq = ULONG_MAX; u64 cs_seq = ULONG_MAX;
int rc, do_ctx_switch; int rc, do_ctx_switch;
bool need_soft_reset = false; bool need_soft_reset = false;
...@@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
goto out; goto out;
} }
chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
num_chunks_execute = args->in.num_chunks_execute;
if (!num_chunks_execute) {
dev_err(hdev->dev,
"Got execute CS with 0 chunks, context %d\n",
ctx->asid);
rc = -EINVAL;
goto out;
}
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
long ret; long ret;
chunks = (void __user *)(uintptr_t)args->in.chunks_restore; chunks_restore =
num_chunks = args->in.num_chunks_restore; (void __user *) (uintptr_t) args->in.chunks_restore;
num_chunks_restore = args->in.num_chunks_restore;
mutex_lock(&hpriv->restore_phase_mutex); mutex_lock(&hpriv->restore_phase_mutex);
...@@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
hdev->asic_funcs->restore_phase_topology(hdev); hdev->asic_funcs->restore_phase_topology(hdev);
if (num_chunks == 0) { if (!num_chunks_restore) {
dev_dbg(hdev->dev, dev_dbg(hdev->dev,
"Need to run restore phase but restore CS is empty\n"); "Need to run restore phase but restore CS is empty\n");
rc = 0; rc = 0;
} else { } else {
rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, rc = _hl_cs_ioctl(hpriv, chunks_restore,
&cs_seq); num_chunks_restore, &cs_seq);
} }
mutex_unlock(&hpriv->restore_phase_mutex); mutex_unlock(&hpriv->restore_phase_mutex);
...@@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
} }
/* Need to wait for restore completion before execution phase */ /* Need to wait for restore completion before execution phase */
if (num_chunks > 0) { if (num_chunks_restore) {
ret = _hl_cs_wait_ioctl(hdev, ctx, ret = _hl_cs_wait_ioctl(hdev, ctx,
jiffies_to_usecs(hdev->timeout_jiffies), jiffies_to_usecs(hdev->timeout_jiffies),
cs_seq); cs_seq);
...@@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
} }
} }
chunks = (void __user *)(uintptr_t)args->in.chunks_execute; rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);
num_chunks = args->in.num_chunks_execute;
if (num_chunks == 0) {
dev_err(hdev->dev,
"Got execute CS with 0 chunks, context %d\n",
ctx->asid);
rc = -EINVAL;
goto out;
}
rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
out: out:
if (rc != -EAGAIN) { if (rc != -EAGAIN) {
......
...@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address, prop->dmmu.start_addr,
prop->va_space_dram_end_address); prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
...@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) ...@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
goto out; goto out;
if (hdev->dram_supports_virtual_memory && if (hdev->dram_supports_virtual_memory &&
addr >= prop->va_space_dram_start_address && (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
addr < prop->va_space_dram_end_address)
return true; return true;
if (addr >= prop->va_space_host_start_address && if (addr >= prop->pmmu.start_addr &&
addr < prop->va_space_host_end_address) addr < prop->pmmu.end_addr)
return true;
if (addr >= prop->pmmu_huge.start_addr &&
addr < prop->pmmu_huge.end_addr)
return true; return true;
out: out:
return false; return false;
...@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, ...@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address, prop->dmmu.start_addr,
prop->va_space_dram_end_address); prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
...@@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, ...@@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
return count; return count;
} }
static ssize_t hl_data_read64(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[32];
u64 addr = entry->addr;
u64 val;
ssize_t rc;
if (*ppos)
return 0;
if (hl_is_device_va(hdev, addr)) {
rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
if (rc) {
dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
return rc;
}
sprintf(tmp_buf, "0x%016llx\n", val);
return simple_read_from_buffer(buf, count, ppos, tmp_buf,
strlen(tmp_buf));
}
static ssize_t hl_data_write64(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
u64 value;
ssize_t rc;
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
if (hl_is_device_va(hdev, addr)) {
rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
if (rc) {
dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
value, addr);
return rc;
}
return count;
}
static ssize_t hl_get_power_state(struct file *f, char __user *buf, static ssize_t hl_get_power_state(struct file *f, char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
...@@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = { ...@@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
.write = hl_data_write32 .write = hl_data_write32
}; };
static const struct file_operations hl_data64b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read64,
.write = hl_data_write64
};
static const struct file_operations hl_i2c_data_fops = { static const struct file_operations hl_i2c_data_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.read = hl_i2c_data_read, .read = hl_i2c_data_read,
...@@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) ...@@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry, dev_entry,
&hl_data32b_fops); &hl_data32b_fops);
debugfs_create_file("data64",
0644,
dev_entry->root,
dev_entry,
&hl_data64b_fops);
debugfs_create_file("set_power_state", debugfs_create_file("set_power_state",
0200, 0200,
dev_entry->root, dev_entry->root,
......
...@@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) ...@@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
status = HL_DEVICE_STATUS_OPERATIONAL; status = HL_DEVICE_STATUS_OPERATIONAL;
return status; return status;
}; }
static void hpriv_release(struct kref *ref) static void hpriv_release(struct kref *ref)
{ {
......
...@@ -324,7 +324,11 @@ static u32 goya_all_events[] = { ...@@ -324,7 +324,11 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH1, GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH2, GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH3, GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
}; };
static int goya_mmu_clear_pgt_range(struct hl_device *hdev); static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
...@@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev) ...@@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->dmmu.hop2_mask = HOP2_MASK; prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK; prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK; prop->dmmu.hop4_mask = HOP4_MASK;
prop->dmmu.huge_page_size = PAGE_SIZE_2MB; prop->dmmu.start_addr = VA_DDR_SPACE_START;
prop->dmmu.end_addr = VA_DDR_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
/* No difference between PMMU and DMMU except of page size */ /* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
prop->dmmu.page_size = PAGE_SIZE_2MB; prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr = VA_HOST_SPACE_END;
prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->va_space_host_start_address = VA_HOST_SPACE_START; /* PMMU and HPMMU are the same except of page size */
prop->va_space_host_end_address = VA_HOST_SPACE_END; memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
prop->va_space_dram_start_address = VA_DDR_SPACE_START; prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
prop->dram_size_for_default_page_mapping = prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
prop->va_space_dram_end_address;
prop->cfg_size = CFG_SIZE; prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID; prop->max_asid = MAX_ASID;
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
...@@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev) ...@@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev)
* After CPU initialization is finished, change DDR bar mapping inside * After CPU initialization is finished, change DDR bar mapping inside
* iATU to point to the start address of the MMU page tables * iATU to point to the start address of the MMU page tables
*/ */
if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE + if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
(MMU_PAGE_TABLES_ADDR &
~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) { ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
dev_err(hdev->dev, dev_err(hdev->dev,
"failed to map DDR bar to MMU page tables\n"); "failed to map DDR bar to MMU page tables\n");
...@@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, ...@@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
/* /*
* WA for HW-23. * WA for HW-23.
* We can't allow user to read from Host using QMANs other than 1. * We can't allow user to read from Host using QMANs other than 1.
* PMMU and HPMMU addresses are equal, check only one of them.
*/ */
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
le32_to_cpu(user_dma_pkt->tsize), le32_to_cpu(user_dma_pkt->tsize),
hdev->asic_prop.va_space_host_start_address, hdev->asic_prop.pmmu.start_addr,
hdev->asic_prop.va_space_host_end_address)) { hdev->asic_prop.pmmu.end_addr)) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Can't DMA from host on queue other then 1\n"); "Can't DMA from host on queue other then 1\n");
return -EFAULT; return -EFAULT;
...@@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) ...@@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
return rc; return rc;
} }
static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 ddr_bar_addr;
int rc = 0;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
u32 val_l = RREG32(addr - CFG_BASE);
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
*val = (((u64) val_h) << 32) | val_l;
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
*val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
(addr - SRAM_BASE_ADDR));
} else if ((addr >= DRAM_PHYS_BASE) &&
(addr <=
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
if (ddr_bar_addr != U64_MAX) {
*val = readq(hdev->pcie_bar[DDR_BAR_ID] +
(addr - bar_base_addr));
ddr_bar_addr = goya_set_ddr_bar_base(hdev,
ddr_bar_addr);
}
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
return rc;
}
static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 ddr_bar_addr;
int rc = 0;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
WREG32(addr - CFG_BASE, lower_32_bits(val));
WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
(addr - SRAM_BASE_ADDR));
} else if ((addr >= DRAM_PHYS_BASE) &&
(addr <=
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
if (ddr_bar_addr != U64_MAX) {
writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
(addr - bar_base_addr));
ddr_bar_addr = goya_set_ddr_bar_base(hdev,
ddr_bar_addr);
}
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
return rc;
}
static u64 goya_read_pte(struct hl_device *hdev, u64 addr) static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
...@@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type) ...@@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type)
return "TPC%d_bmon_spmu"; return "TPC%d_bmon_spmu";
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
return "DMA_bm_ch%d"; return "DMA_bm_ch%d";
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
return "POWER_ENV_S";
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
return "POWER_ENV_E";
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
return "THERMAL_ENV_S";
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
return "THERMAL_ENV_E";
default: default:
return "N/A"; return "N/A";
} }
...@@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) ...@@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
static void goya_print_razwi_info(struct hl_device *hdev) static void goya_print_razwi_info(struct hl_device *hdev)
{ {
if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) { if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
dev_err(hdev->dev, "Illegal write to LBW\n"); dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0); WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
} }
if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) { if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
dev_err(hdev->dev, "Illegal read from LBW\n"); dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0); WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
} }
if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) { if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
dev_err(hdev->dev, "Illegal write to HBW\n"); dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0); WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
} }
if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) { if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
dev_err(hdev->dev, "Illegal read from HBW\n"); dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0); WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
} }
} }
...@@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev) ...@@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
addr <<= 32; addr <<= 32;
addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA); addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr); dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
addr);
WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0); WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
} }
...@@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, ...@@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
char desc[20] = ""; char desc[20] = "";
goya_get_event_desc(event_type, desc, sizeof(desc)); goya_get_event_desc(event_type, desc, sizeof(desc));
dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc); event_type, desc);
if (razwi) { if (razwi) {
...@@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ...@@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
return rc; return rc;
} }
static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
{
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
default:
dev_err(hdev->dev, "Received invalid clock change event %d\n",
event_type);
break;
}
}
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{ {
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
...@@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) ...@@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
goya_unmask_irq(hdev, event_type); goya_unmask_irq(hdev, event_type);
break; break;
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
goya_print_clk_change_info(hdev, event_type);
goya_unmask_irq(hdev, event_type);
break;
default: default:
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
event_type); event_type);
...@@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off, rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
prop->dram_base_address + off, PAGE_SIZE_2MB); prop->dram_base_address + off, PAGE_SIZE_2MB,
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
if (rc) { if (rc) {
dev_err(hdev->dev, "Map failed for address 0x%llx\n", dev_err(hdev->dev, "Map failed for address 0x%llx\n",
prop->dram_base_address + off); prop->dram_base_address + off);
...@@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB); hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
...@@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
rc = hl_mmu_map(hdev->kernel_ctx, rc = hl_mmu_map(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
hdev->cpu_accessible_dma_address + cpu_off, hdev->cpu_accessible_dma_address + cpu_off,
PAGE_SIZE_4KB); PAGE_SIZE_4KB, true);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Map failed for CPU accessible memory\n"); "Map failed for CPU accessible memory\n");
...@@ -4825,14 +4966,15 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4825,14 +4966,15 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
PAGE_SIZE_4KB)) PAGE_SIZE_4KB, true))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n", "failed to unmap address 0x%llx\n",
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
unmap: unmap:
for (; off >= 0 ; off -= PAGE_SIZE_2MB) for (; off >= 0 ; off -= PAGE_SIZE_2MB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
prop->dram_base_address + off, PAGE_SIZE_2MB)) prop->dram_base_address + off, PAGE_SIZE_2MB,
true))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n", "failed to unmap address 0x%llx\n",
prop->dram_base_address + off); prop->dram_base_address + off);
...@@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) ...@@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
PAGE_SIZE_2MB)) PAGE_SIZE_2MB, true))
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Failed to unmap CPU accessible memory\n"); "Failed to unmap CPU accessible memory\n");
} else { } else {
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
PAGE_SIZE_4KB)) PAGE_SIZE_4KB,
(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n", "failed to unmap address 0x%llx\n",
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
...@@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) ...@@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
prop->dram_base_address + off, PAGE_SIZE_2MB)) prop->dram_base_address + off, PAGE_SIZE_2MB,
(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"Failed to unmap address 0x%llx\n", "Failed to unmap address 0x%llx\n",
prop->dram_base_address + off); prop->dram_base_address + off);
...@@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, ...@@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
} }
static void goya_hw_queues_lock(struct hl_device *hdev) static void goya_hw_queues_lock(struct hl_device *hdev)
__acquires(&goya->hw_queues_lock)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
...@@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev) ...@@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev)
} }
static void goya_hw_queues_unlock(struct hl_device *hdev) static void goya_hw_queues_unlock(struct hl_device *hdev)
__releases(&goya->hw_queues_lock)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
...@@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = {
.restore_phase_topology = goya_restore_phase_topology, .restore_phase_topology = goya_restore_phase_topology,
.debugfs_read32 = goya_debugfs_read32, .debugfs_read32 = goya_debugfs_read32,
.debugfs_write32 = goya_debugfs_write32, .debugfs_write32 = goya_debugfs_write32,
.debugfs_read64 = goya_debugfs_read64,
.debugfs_write64 = goya_debugfs_write64,
.add_device_attr = goya_add_device_attr, .add_device_attr = goya_add_device_attr,
.handle_eqe = goya_handle_eqe, .handle_eqe = goya_handle_eqe,
.set_pll_profile = goya_set_pll_profile, .set_pll_profile = goya_set_pll_profile,
......
...@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr, ...@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
u64 range_start, range_end; u64 range_start, range_end;
if (hdev->mmu_enable) { if (hdev->mmu_enable) {
range_start = prop->va_space_dram_start_address; range_start = prop->dmmu.start_addr;
range_end = prop->va_space_dram_end_address; range_end = prop->dmmu.end_addr;
} else { } else {
range_start = prop->dram_user_base_address; range_start = prop->dram_user_base_address;
range_end = prop->dram_end_address; range_end = prop->dram_end_address;
......
...@@ -298,8 +298,8 @@ static ssize_t pm_mng_profile_store(struct device *dev, ...@@ -298,8 +298,8 @@ static ssize_t pm_mng_profile_store(struct device *dev,
/* Make sure we are in LOW PLL when changing modes */ /* Make sure we are in LOW PLL when changing modes */
if (hdev->pm_mng_profile == PM_MANUAL) { if (hdev->pm_mng_profile == PM_MANUAL) {
hdev->curr_pll_profile = PLL_HIGH; hdev->curr_pll_profile = PLL_HIGH;
hl_device_set_frequency(hdev, PLL_LOW);
hdev->pm_mng_profile = PM_AUTO; hdev->pm_mng_profile = PM_AUTO;
hl_device_set_frequency(hdev, PLL_LOW);
} }
} else if (strncmp("manual", buf, strlen("manual")) == 0) { } else if (strncmp("manual", buf, strlen("manual")) == 0) {
if (hdev->pm_mng_profile == PM_AUTO) { if (hdev->pm_mng_profile == PM_AUTO) {
......
...@@ -132,6 +132,8 @@ enum hl_device_hw_state { ...@@ -132,6 +132,8 @@ enum hl_device_hw_state {
/** /**
* struct hl_mmu_properties - ASIC specific MMU address translation properties. * struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @start_addr: virtual start address of the memory region.
* @end_addr: virtual end address of the memory region.
* @hop0_shift: shift of hop 0 mask. * @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask. * @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask. * @hop2_shift: shift of hop 2 mask.
...@@ -143,9 +145,10 @@ enum hl_device_hw_state { ...@@ -143,9 +145,10 @@ enum hl_device_hw_state {
* @hop3_mask: mask to get the PTE address in hop 3. * @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4. * @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory. * @page_size: default page size used to allocate memory.
* @huge_page_size: page size used to allocate memory with huge pages.
*/ */
struct hl_mmu_properties { struct hl_mmu_properties {
u64 start_addr;
u64 end_addr;
u64 hop0_shift; u64 hop0_shift;
u64 hop1_shift; u64 hop1_shift;
u64 hop2_shift; u64 hop2_shift;
...@@ -157,7 +160,6 @@ struct hl_mmu_properties { ...@@ -157,7 +160,6 @@ struct hl_mmu_properties {
u64 hop3_mask; u64 hop3_mask;
u64 hop4_mask; u64 hop4_mask;
u32 page_size; u32 page_size;
u32 huge_page_size;
}; };
/** /**
...@@ -169,6 +171,8 @@ struct hl_mmu_properties { ...@@ -169,6 +171,8 @@ struct hl_mmu_properties {
* @preboot_ver: F/W Preboot version. * @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties. * @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties. * @pmmu: PCI (host) MMU address translation properties.
* @pmmu_huge: PCI (host) MMU address translation properties for memory
* allocated with huge pages.
* @sram_base_address: SRAM physical start address. * @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address. * @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access. * @sram_user_base_address - SRAM physical start address for user access.
...@@ -178,14 +182,6 @@ struct hl_mmu_properties { ...@@ -178,14 +182,6 @@ struct hl_mmu_properties {
* @dram_size: DRAM total size. * @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM. * @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset * @max_power_default: max power of the device after reset
* @va_space_host_start_address: base address of virtual memory range for
* mapping host memory.
* @va_space_host_end_address: end address of virtual memory range for
* mapping host memory.
* @va_space_dram_start_address: base address of virtual memory range for
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault. * fault.
* @pcie_dbi_base_address: Base address of the PCIE_DBI block. * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
...@@ -218,6 +214,7 @@ struct asic_fixed_properties { ...@@ -218,6 +214,7 @@ struct asic_fixed_properties {
char preboot_ver[VERSION_MAX_LEN]; char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu; struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu; struct hl_mmu_properties pmmu;
struct hl_mmu_properties pmmu_huge;
u64 sram_base_address; u64 sram_base_address;
u64 sram_end_address; u64 sram_end_address;
u64 sram_user_base_address; u64 sram_user_base_address;
...@@ -227,10 +224,6 @@ struct asic_fixed_properties { ...@@ -227,10 +224,6 @@ struct asic_fixed_properties {
u64 dram_size; u64 dram_size;
u64 dram_pci_bar_size; u64 dram_pci_bar_size;
u64 max_power_default; u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping; u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address; u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr; u64 pcie_aux_dbi_reg_addr;
...@@ -431,10 +424,12 @@ struct hl_eq { ...@@ -431,10 +424,12 @@ struct hl_eq {
* enum hl_asic_type - supported ASIC types. * enum hl_asic_type - supported ASIC types.
* @ASIC_INVALID: Invalid ASIC type. * @ASIC_INVALID: Invalid ASIC type.
* @ASIC_GOYA: Goya device. * @ASIC_GOYA: Goya device.
* @ASIC_GAUDI: Gaudi device.
*/ */
enum hl_asic_type { enum hl_asic_type {
ASIC_INVALID, ASIC_INVALID,
ASIC_GOYA ASIC_GOYA,
ASIC_GAUDI
}; };
struct hl_cs_parser; struct hl_cs_parser;
...@@ -589,6 +584,8 @@ struct hl_asic_funcs { ...@@ -589,6 +584,8 @@ struct hl_asic_funcs {
void (*restore_phase_topology)(struct hl_device *hdev); void (*restore_phase_topology)(struct hl_device *hdev);
int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
void (*add_device_attr)(struct hl_device *hdev, void (*add_device_attr)(struct hl_device *hdev,
struct attribute_group *dev_attr_grp); struct attribute_group *dev_attr_grp);
void (*handle_eqe)(struct hl_device *hdev, void (*handle_eqe)(struct hl_device *hdev,
...@@ -658,6 +655,8 @@ struct hl_va_range { ...@@ -658,6 +655,8 @@ struct hl_va_range {
* this hits 0l. It is incremented on CS and CS_WAIT. * this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of DMA fence objects representing pending CS. * @cs_pending: array of DMA fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings. * @host_va_range: holds available virtual addresses for host mappings.
* @host_huge_va_range: holds available virtual addresses for host mappings
* with huge pages.
* @dram_va_range: holds available virtual addresses for DRAM mappings. * @dram_va_range: holds available virtual addresses for DRAM mappings.
* @mem_hash_lock: protects the mem_hash. * @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
...@@ -688,8 +687,9 @@ struct hl_ctx { ...@@ -688,8 +687,9 @@ struct hl_ctx {
struct hl_device *hdev; struct hl_device *hdev;
struct kref refcount; struct kref refcount;
struct dma_fence *cs_pending[HL_MAX_PENDING_CS]; struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
struct hl_va_range host_va_range; struct hl_va_range *host_va_range;
struct hl_va_range dram_va_range; struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range;
struct mutex mem_hash_lock; struct mutex mem_hash_lock;
struct mutex mmu_lock; struct mutex mmu_lock;
struct list_head debugfs_list; struct list_head debugfs_list;
...@@ -763,7 +763,7 @@ struct hl_userptr { ...@@ -763,7 +763,7 @@ struct hl_userptr {
* @aborted: true if CS was aborted due to some device error. * @aborted: true if CS was aborted due to some device error.
*/ */
struct hl_cs { struct hl_cs {
u8 jobs_in_queue_cnt[HL_MAX_QUEUES]; u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
struct hl_ctx *ctx; struct hl_ctx *ctx;
struct list_head job_list; struct list_head job_list;
spinlock_t job_lock; spinlock_t job_lock;
...@@ -1291,6 +1291,8 @@ struct hl_device_idle_busy_ts { ...@@ -1291,6 +1291,8 @@ struct hl_device_idle_busy_ts {
* otherwise. * otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM. * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled. * @dram_default_page_mapping: is DRAM default page mapping enabled.
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
* huge pages.
* @init_done: is the initialization of the device done. * @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled. * @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
...@@ -1372,6 +1374,7 @@ struct hl_device { ...@@ -1372,6 +1374,7 @@ struct hl_device {
u8 reset_on_lockup; u8 reset_on_lockup;
u8 dram_supports_virtual_memory; u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping; u8 dram_default_page_mapping;
u8 pmmu_huge_range;
u8 init_done; u8 init_done;
u8 device_cpu_disabled; u8 device_cpu_disabled;
u8 dma_mask; u8 dma_mask;
...@@ -1573,8 +1576,10 @@ int hl_mmu_init(struct hl_device *hdev); ...@@ -1573,8 +1576,10 @@ int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev); void hl_mmu_fini(struct hl_device *hdev);
int hl_mmu_ctx_init(struct hl_ctx *ctx); int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size); int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size); u32 page_size, bool flush_pte);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte);
void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx);
...@@ -1606,11 +1611,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask); ...@@ -1606,11 +1611,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr); int hl_get_temperature(struct hl_device *hdev,
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr); int sensor_index, u32 attr, long *value);
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr); int hl_set_temperature(struct hl_device *hdev,
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr); int sensor_index, u32 attr, long value);
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr); int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value); long value);
u64 hl_get_max_power(struct hl_device *hdev); u64 hl_get_max_power(struct hl_device *hdev);
......
...@@ -40,12 +40,13 @@ MODULE_PARM_DESC(reset_on_lockup, ...@@ -40,12 +40,13 @@ MODULE_PARM_DESC(reset_on_lockup,
#define PCI_VENDOR_ID_HABANALABS 0x1da3 #define PCI_VENDOR_ID_HABANALABS 0x1da3
#define PCI_IDS_GOYA 0x0001 #define PCI_IDS_GOYA 0x0001
#define PCI_IDS_GAUDI 0x1000
static const struct pci_device_id ids[] = { static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ 0, } { 0, }
}; };
MODULE_DEVICE_TABLE(pci, ids);
/* /*
* get_asic_type - translate device id to asic type * get_asic_type - translate device id to asic type
...@@ -63,6 +64,9 @@ static enum hl_asic_type get_asic_type(u16 device) ...@@ -63,6 +64,9 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GOYA: case PCI_IDS_GOYA:
asic_type = ASIC_GOYA; asic_type = ASIC_GOYA;
break; break;
case PCI_IDS_GAUDI:
asic_type = ASIC_GAUDI;
break;
default: default:
asic_type = ASIC_INVALID; asic_type = ASIC_INVALID;
break; break;
...@@ -263,6 +267,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -263,6 +267,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
dev_err(&pdev->dev, "Unsupported ASIC\n"); dev_err(&pdev->dev, "Unsupported ASIC\n");
rc = -ENODEV; rc = -ENODEV;
goto free_hdev; goto free_hdev;
} else if (hdev->asic_type == ASIC_GAUDI) {
dev_err(&pdev->dev,
"GAUDI is not supported by the current kernel\n");
rc = -ENODEV;
goto free_hdev;
} }
} else { } else {
hdev->asic_type = asic_type; hdev->asic_type = asic_type;
......
...@@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val) u32 attr, int channel, long *val)
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV; return -ENODEV;
...@@ -125,36 +126,40 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -125,36 +126,40 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp_crit: case hwmon_temp_crit:
case hwmon_temp_max_hyst: case hwmon_temp_max_hyst:
case hwmon_temp_crit_hyst: case hwmon_temp_crit_hyst:
case hwmon_temp_offset:
case hwmon_temp_highest:
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
*val = hl_get_temperature(hdev, channel, attr); rc = hl_get_temperature(hdev, channel, attr, val);
break; break;
case hwmon_in: case hwmon_in:
switch (attr) { switch (attr) {
case hwmon_in_input: case hwmon_in_input:
case hwmon_in_min: case hwmon_in_min:
case hwmon_in_max: case hwmon_in_max:
case hwmon_in_highest:
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
*val = hl_get_voltage(hdev, channel, attr); rc = hl_get_voltage(hdev, channel, attr, val);
break; break;
case hwmon_curr: case hwmon_curr:
switch (attr) { switch (attr) {
case hwmon_curr_input: case hwmon_curr_input:
case hwmon_curr_min: case hwmon_curr_min:
case hwmon_curr_max: case hwmon_curr_max:
case hwmon_curr_highest:
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
*val = hl_get_current(hdev, channel, attr); rc = hl_get_current(hdev, channel, attr, val);
break; break;
case hwmon_fan: case hwmon_fan:
switch (attr) { switch (attr) {
...@@ -165,7 +170,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -165,7 +170,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default: default:
return -EINVAL; return -EINVAL;
} }
*val = hl_get_fan_speed(hdev, channel, attr); rc = hl_get_fan_speed(hdev, channel, attr, val);
break; break;
case hwmon_pwm: case hwmon_pwm:
switch (attr) { switch (attr) {
...@@ -175,12 +180,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -175,12 +180,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default: default:
return -EINVAL; return -EINVAL;
} }
*val = hl_get_pwm_info(hdev, channel, attr); rc = hl_get_pwm_info(hdev, channel, attr, val);
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
return 0; return rc;
} }
static int hl_write(struct device *dev, enum hwmon_sensor_types type, static int hl_write(struct device *dev, enum hwmon_sensor_types type,
...@@ -192,6 +197,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, ...@@ -192,6 +197,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
return -ENODEV; return -ENODEV;
switch (type) { switch (type) {
case hwmon_temp:
switch (attr) {
case hwmon_temp_offset:
break;
default:
return -EINVAL;
}
hl_set_temperature(hdev, channel, attr, val);
break;
case hwmon_pwm: case hwmon_pwm:
switch (attr) { switch (attr) {
case hwmon_pwm_input: case hwmon_pwm_input:
...@@ -219,7 +233,10 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, ...@@ -219,7 +233,10 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_temp_max_hyst: case hwmon_temp_max_hyst:
case hwmon_temp_crit: case hwmon_temp_crit:
case hwmon_temp_crit_hyst: case hwmon_temp_crit_hyst:
case hwmon_temp_highest:
return 0444; return 0444;
case hwmon_temp_offset:
return 0644;
} }
break; break;
case hwmon_in: case hwmon_in:
...@@ -227,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, ...@@ -227,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_in_input: case hwmon_in_input:
case hwmon_in_min: case hwmon_in_min:
case hwmon_in_max: case hwmon_in_max:
case hwmon_in_highest:
return 0444; return 0444;
} }
break; break;
...@@ -235,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, ...@@ -235,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_curr_input: case hwmon_curr_input:
case hwmon_curr_min: case hwmon_curr_min:
case hwmon_curr_max: case hwmon_curr_max:
case hwmon_curr_highest:
return 0444; return 0444;
} }
break; break;
...@@ -265,10 +284,10 @@ static const struct hwmon_ops hl_hwmon_ops = { ...@@ -265,10 +284,10 @@ static const struct hwmon_ops hl_hwmon_ops = {
.write = hl_write .write = hl_write
}; };
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr) int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct armcp_packet pkt;
long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -279,22 +298,47 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr) ...@@ -279,22 +298,47 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result); SENSORS_PKT_TIMEOUT, value);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n", "Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc); sensor_index, rc);
result = 0; *value = 0;
} }
return result; return rc;
} }
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr) int hl_set_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct armcp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, NULL);
if (rc)
dev_err(hdev->dev,
"Failed to set temperature of sensor %d, error %d\n",
sensor_index, rc);
return rc;
}
int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct armcp_packet pkt;
long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -305,22 +349,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr) ...@@ -305,22 +349,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result); SENSORS_PKT_TIMEOUT, value);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n", "Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc); sensor_index, rc);
result = 0; *value = 0;
} }
return result; return rc;
} }
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr) int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct armcp_packet pkt;
long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -331,22 +375,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr) ...@@ -331,22 +375,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result); SENSORS_PKT_TIMEOUT, value);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to get current from sensor %d, error %d\n", "Failed to get current from sensor %d, error %d\n",
sensor_index, rc); sensor_index, rc);
result = 0; *value = 0;
} }
return result; return rc;
} }
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr) int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct armcp_packet pkt;
long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -357,22 +401,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr) ...@@ -357,22 +401,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result); SENSORS_PKT_TIMEOUT, value);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n", "Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc); sensor_index, rc);
result = 0; *value = 0;
} }
return result; return rc;
} }
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr) int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct armcp_packet pkt;
long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
...@@ -383,16 +427,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr) ...@@ -383,16 +427,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result); SENSORS_PKT_TIMEOUT, value);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n", "Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc); sensor_index, rc);
result = 0; *value = 0;
} }
return result; return rc;
} }
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
......
...@@ -189,6 +189,10 @@ enum pq_init_status { ...@@ -189,6 +189,10 @@ enum pq_init_status {
* ArmCP to write to the structure, to prevent data corruption in case of * ArmCP to write to the structure, to prevent data corruption in case of
* mismatched driver/FW versions. * mismatched driver/FW versions.
* *
* ARMCP_PACKET_TEMPERATURE_SET -
* Set the value of the offset property of a specified thermal sensor.
* The packet's arguments specify the desired sensor and the field to
* set.
*/ */
enum armcp_packet_id { enum armcp_packet_id {
...@@ -214,6 +218,8 @@ enum armcp_packet_id { ...@@ -214,6 +218,8 @@ enum armcp_packet_id {
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */ ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */ ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */ ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
ARMCP_RESERVED,
ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
}; };
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 #define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
...@@ -271,24 +277,32 @@ enum armcp_packet_rc { ...@@ -271,24 +277,32 @@ enum armcp_packet_rc {
armcp_packet_fault armcp_packet_fault
}; };
/*
* armcp_temp_type should adhere to hwmon_temp_attributes
* defined in Linux kernel hwmon.h file
*/
enum armcp_temp_type { enum armcp_temp_type {
armcp_temp_input, armcp_temp_input,
armcp_temp_max = 6, armcp_temp_max = 6,
armcp_temp_max_hyst, armcp_temp_max_hyst,
armcp_temp_crit, armcp_temp_crit,
armcp_temp_crit_hyst armcp_temp_crit_hyst,
armcp_temp_offset = 19,
armcp_temp_highest = 22
}; };
enum armcp_in_attributes { enum armcp_in_attributes {
armcp_in_input, armcp_in_input,
armcp_in_min, armcp_in_min,
armcp_in_max armcp_in_max,
armcp_in_highest = 7
}; };
enum armcp_curr_attributes { enum armcp_curr_attributes {
armcp_curr_input, armcp_curr_input,
armcp_curr_min, armcp_curr_min,
armcp_curr_max armcp_curr_max,
armcp_curr_highest = 7
}; };
enum armcp_fan_attributes { enum armcp_fan_attributes {
......
...@@ -188,6 +188,10 @@ enum goya_async_event_id { ...@@ -188,6 +188,10 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485, GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486, GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487, GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E = 510,
GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023, GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
GOYA_ASYNC_EVENT_ID_SIZE GOYA_ASYNC_EVENT_ID_SIZE
}; };
......
...@@ -11,24 +11,27 @@ ...@@ -11,24 +11,27 @@
/* /*
* PSOC scratch-pad registers * PSOC scratch-pad registers
*/ */
#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 #define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 #define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2 #define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3 #define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4 #define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5 #define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6 #define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7 #define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8 #define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9 #define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10 #define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26 #define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27 #define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28 #define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29 #define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30 #define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31 #define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS #define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
#define mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS mmPSOC_GLOBAL_CONF_WARM_REBOOT
#endif /* GOYA_REG_MAP_H_ */ #endif /* GOYA_REG_MAP_H_ */
...@@ -8,20 +8,35 @@ ...@@ -8,20 +8,35 @@
#ifndef HL_BOOT_IF_H #ifndef HL_BOOT_IF_H
#define HL_BOOT_IF_H #define HL_BOOT_IF_H
#define LKD_HARD_RESET_MAGIC 0xED7BD694
/* CPU error bits in BOOT_ERROR registers */
#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
enum cpu_boot_status { enum cpu_boot_status {
CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
CPU_BOOT_STATUS_IN_WFE, CPU_BOOT_STATUS_IN_WFE = 1,
CPU_BOOT_STATUS_DRAM_RDY, CPU_BOOT_STATUS_DRAM_RDY = 2,
CPU_BOOT_STATUS_SRAM_AVAIL, CPU_BOOT_STATUS_SRAM_AVAIL = 3,
CPU_BOOT_STATUS_IN_BTL, /* BTL is H/W FSM */ CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
CPU_BOOT_STATUS_IN_PREBOOT, CPU_BOOT_STATUS_IN_PREBOOT = 5,
CPU_BOOT_STATUS_IN_SPL, CPU_BOOT_STATUS_IN_SPL = 6,
CPU_BOOT_STATUS_IN_UBOOT, CPU_BOOT_STATUS_IN_UBOOT = 7,
CPU_BOOT_STATUS_DRAM_INIT_FAIL, CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
CPU_BOOT_STATUS_FIT_CORRUPTED, CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
CPU_BOOT_STATUS_UBOOT_NOT_READY, CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
CPU_BOOT_STATUS_RESERVED, CPU_BOOT_STATUS_NIC_FW_RDY = 11,
CPU_BOOT_STATUS_TS_INIT_FAIL, CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
CPU_BOOT_STATUS_READY_TO_BOOT = 15,
}; };
enum kmd_msg { enum kmd_msg {
......
...@@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev,
* or not, hence we continue with the biggest possible * or not, hence we continue with the biggest possible
* granularity. * granularity.
*/ */
page_size = hdev->asic_prop.pmmu.huge_page_size; page_size = hdev->asic_prop.pmmu_huge.page_size;
else else
page_size = hdev->asic_prop.dmmu.page_size; page_size = hdev->asic_prop.dmmu.page_size;
...@@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, ...@@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr, struct hl_userptr *userptr,
struct hl_vm_phys_pg_pack **pphys_pg_pack) struct hl_vm_phys_pg_pack **pphys_pg_pack)
{ {
struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_phys_pg_pack *phys_pg_pack;
struct scatterlist *sg; struct scatterlist *sg;
dma_addr_t dma_addr; dma_addr_t dma_addr;
u64 page_mask, total_npages; u64 page_mask, total_npages;
u32 npages, page_size = PAGE_SIZE, u32 npages, page_size = PAGE_SIZE,
huge_page_size = mmu_prop->huge_page_size; huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
bool first = true, is_huge_page_opt = true; bool first = true, is_huge_page_opt = true;
int rc, i, j; int rc, i, j;
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
...@@ -747,7 +746,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, ...@@ -747,7 +746,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
for (i = 0 ; i < phys_pg_pack->npages ; i++) { for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i]; paddr = phys_pg_pack->pages[i];
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size); rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
(i + 1) == phys_pg_pack->npages);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"map failed for handle %u, npages: %llu, mapped: %llu", "map failed for handle %u, npages: %llu, mapped: %llu",
...@@ -765,7 +765,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, ...@@ -765,7 +765,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
err: err:
next_vaddr = vaddr; next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) { for (i = 0 ; i < mapped_pg_cnt ; i++) {
if (hl_mmu_unmap(ctx, next_vaddr, page_size)) if (hl_mmu_unmap(ctx, next_vaddr, page_size,
(i + 1) == mapped_pg_cnt))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
phys_pg_pack->handle, next_vaddr, phys_pg_pack->handle, next_vaddr,
...@@ -794,7 +795,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, ...@@ -794,7 +795,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
next_vaddr = vaddr; next_vaddr = vaddr;
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
if (hl_mmu_unmap(ctx, next_vaddr, page_size)) if (hl_mmu_unmap(ctx, next_vaddr, page_size,
(i + 1) == phys_pg_pack->npages))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"unmap failed for vaddr: 0x%llx\n", next_vaddr); "unmap failed for vaddr: 0x%llx\n", next_vaddr);
...@@ -853,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -853,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_userptr *userptr = NULL; struct hl_userptr *userptr = NULL;
struct hl_vm_hash_node *hnode; struct hl_vm_hash_node *hnode;
struct hl_va_range *va_range;
enum vm_type_t *vm_type; enum vm_type_t *vm_type;
u64 ret_vaddr, hint_addr; u64 ret_vaddr, hint_addr;
u32 handle = 0; u32 handle = 0;
...@@ -924,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -924,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto hnode_err; goto hnode_err;
} }
ret_vaddr = get_va_block(hdev, if (is_userptr)
is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
phys_pg_pack->total_size, hint_addr, is_userptr); va_range = ctx->host_va_range;
else
va_range = ctx->host_huge_va_range;
else
va_range = ctx->dram_va_range;
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
hint_addr, is_userptr);
if (!ret_vaddr) { if (!ret_vaddr) {
dev_err(hdev->dev, "no available va block for handle %u\n", dev_err(hdev->dev, "no available va block for handle %u\n",
handle); handle);
...@@ -965,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -965,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return 0; return 0;
map_err: map_err:
if (add_va_block(hdev, if (add_va_block(hdev, va_range, ret_vaddr,
is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, ret_vaddr + phys_pg_pack->total_size - 1))
ret_vaddr,
ret_vaddr + phys_pg_pack->total_size - 1))
dev_warn(hdev->dev, dev_warn(hdev->dev,
"release va block failed for handle 0x%x, vaddr: 0x%llx\n", "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
handle, ret_vaddr); handle, ret_vaddr);
...@@ -1030,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) ...@@ -1030,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
if (*vm_type == VM_TYPE_USERPTR) { if (*vm_type == VM_TYPE_USERPTR) {
is_userptr = true; is_userptr = true;
va_range = &ctx->host_va_range;
userptr = hnode->ptr; userptr = hnode->ptr;
rc = init_phys_pg_pack_from_userptr(ctx, userptr, rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack); &phys_pg_pack);
...@@ -1040,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) ...@@ -1040,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
vaddr); vaddr);
goto vm_type_err; goto vm_type_err;
} }
if (phys_pg_pack->page_size ==
hdev->asic_prop.pmmu.page_size)
va_range = ctx->host_va_range;
else
va_range = ctx->host_huge_va_range;
} else if (*vm_type == VM_TYPE_PHYS_PACK) { } else if (*vm_type == VM_TYPE_PHYS_PACK) {
is_userptr = false; is_userptr = false;
va_range = &ctx->dram_va_range; va_range = ctx->dram_va_range;
phys_pg_pack = hnode->ptr; phys_pg_pack = hnode->ptr;
} else { } else {
dev_warn(hdev->dev, dev_warn(hdev->dev,
...@@ -1438,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, ...@@ -1438,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
} }
/* /*
* hl_va_range_init - initialize virtual addresses range * va_range_init - initialize virtual addresses range
* * @hdev: pointer to the habanalabs device structure
* @hdev : pointer to the habanalabs device structure * @va_range: pointer to the range to initialize
* @va_range : pointer to the range to initialize * @start: range start address
* @start : range start address * @end: range end address
* @end : range end address
* *
* This function does the following: * This function does the following:
* - Initializes the virtual addresses list of the given range with the given * - Initializes the virtual addresses list of the given range with the given
* addresses. * addresses.
*/ */
static int hl_va_range_init(struct hl_device *hdev, static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
struct hl_va_range *va_range, u64 start, u64 end) u64 start, u64 end)
{ {
int rc; int rc;
...@@ -1485,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev, ...@@ -1485,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev,
} }
/* /*
* hl_vm_ctx_init_with_ranges - initialize virtual memory for context * va_range_fini() - clear a virtual addresses range
* @hdev: pointer to the habanalabs structure
* va_range: pointer to virtual addresses range
* *
* @ctx : pointer to the habanalabs context structure * This function does the following:
* @host_range_start : host virtual addresses range start * - Frees the virtual addresses block list and its lock
* @host_range_end : host virtual addresses range end */
* @dram_range_start : dram virtual addresses range start static void va_range_fini(struct hl_device *hdev,
* @dram_range_end : dram virtual addresses range end struct hl_va_range *va_range)
{
mutex_lock(&va_range->lock);
clear_va_list_locked(hdev, &va_range->list);
mutex_unlock(&va_range->lock);
mutex_destroy(&va_range->lock);
kfree(va_range);
}
/*
* vm_ctx_init_with_ranges() - initialize virtual memory for context
* @ctx: pointer to the habanalabs context structure
* @host_range_start: host virtual addresses range start.
* @host_range_end: host virtual addresses range end.
* @host_huge_range_start: host virtual addresses range start for memory
* allocated with huge pages.
* @host_huge_range_end: host virtual addresses range end for memory allocated
* with huge pages.
* @dram_range_start: dram virtual addresses range start.
* @dram_range_end: dram virtual addresses range end.
* *
* This function initializes the following: * This function initializes the following:
* - MMU for context * - MMU for context
* - Virtual address to area descriptor hashtable * - Virtual address to area descriptor hashtable
* - Virtual block list of available virtual memory * - Virtual block list of available virtual memory
*/ */
static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
u64 host_range_end, u64 dram_range_start, u64 host_range_start,
u64 dram_range_end) u64 host_range_end,
u64 host_huge_range_start,
u64 host_huge_range_end,
u64 dram_range_start,
u64 dram_range_end)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
int rc; int rc;
ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
if (!ctx->host_va_range)
return -ENOMEM;
ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
GFP_KERNEL);
if (!ctx->host_huge_va_range) {
rc = -ENOMEM;
goto host_huge_va_range_err;
}
ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
if (!ctx->dram_va_range) {
rc = -ENOMEM;
goto dram_va_range_err;
}
rc = hl_mmu_ctx_init(ctx); rc = hl_mmu_ctx_init(ctx);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
return rc; goto mmu_ctx_err;
} }
mutex_init(&ctx->mem_hash_lock); mutex_init(&ctx->mem_hash_lock);
hash_init(ctx->mem_hash); hash_init(ctx->mem_hash);
mutex_init(&ctx->host_va_range.lock); mutex_init(&ctx->host_va_range->lock);
rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start, rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
host_range_end); host_range_end);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to init host vm range\n"); dev_err(hdev->dev, "failed to init host vm range\n");
goto host_vm_err; goto host_page_range_err;
}
if (hdev->pmmu_huge_range) {
mutex_init(&ctx->host_huge_va_range->lock);
rc = va_range_init(hdev, ctx->host_huge_va_range,
host_huge_range_start,
host_huge_range_end);
if (rc) {
dev_err(hdev->dev,
"failed to init host huge vm range\n");
goto host_hpage_range_err;
}
} else {
ctx->host_huge_va_range = ctx->host_va_range;
} }
mutex_init(&ctx->dram_va_range.lock); mutex_init(&ctx->dram_va_range->lock);
rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start, rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
dram_range_end); dram_range_end);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to init dram vm range\n"); dev_err(hdev->dev, "failed to init dram vm range\n");
...@@ -1537,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, ...@@ -1537,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
return 0; return 0;
dram_vm_err: dram_vm_err:
mutex_destroy(&ctx->dram_va_range.lock); mutex_destroy(&ctx->dram_va_range->lock);
mutex_lock(&ctx->host_va_range.lock); if (hdev->pmmu_huge_range) {
clear_va_list_locked(hdev, &ctx->host_va_range.list); mutex_lock(&ctx->host_huge_va_range->lock);
mutex_unlock(&ctx->host_va_range.lock); clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
host_vm_err: mutex_unlock(&ctx->host_huge_va_range->lock);
mutex_destroy(&ctx->host_va_range.lock); }
host_hpage_range_err:
if (hdev->pmmu_huge_range)
mutex_destroy(&ctx->host_huge_va_range->lock);
mutex_lock(&ctx->host_va_range->lock);
clear_va_list_locked(hdev, &ctx->host_va_range->list);
mutex_unlock(&ctx->host_va_range->lock);
host_page_range_err:
mutex_destroy(&ctx->host_va_range->lock);
mutex_destroy(&ctx->mem_hash_lock); mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx); hl_mmu_ctx_fini(ctx);
mmu_ctx_err:
kfree(ctx->dram_va_range);
dram_va_range_err:
kfree(ctx->host_huge_va_range);
host_huge_va_range_err:
kfree(ctx->host_va_range);
return rc; return rc;
} }
...@@ -1553,8 +1637,8 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, ...@@ -1553,8 +1637,8 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
int hl_vm_ctx_init(struct hl_ctx *ctx) int hl_vm_ctx_init(struct hl_ctx *ctx)
{ {
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
u64 host_range_start, host_range_end, dram_range_start, u64 host_range_start, host_range_end, host_huge_range_start,
dram_range_end; host_huge_range_end, dram_range_start, dram_range_end;
atomic64_set(&ctx->dram_phys_mem, 0); atomic64_set(&ctx->dram_phys_mem, 0);
...@@ -1566,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) ...@@ -1566,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
* address of the memory related to the given handle. * address of the memory related to the given handle.
*/ */
if (ctx->hdev->mmu_enable) { if (ctx->hdev->mmu_enable) {
dram_range_start = prop->va_space_dram_start_address; dram_range_start = prop->dmmu.start_addr;
dram_range_end = prop->va_space_dram_end_address; dram_range_end = prop->dmmu.end_addr;
host_range_start = prop->va_space_host_start_address; host_range_start = prop->pmmu.start_addr;
host_range_end = prop->va_space_host_end_address; host_range_end = prop->pmmu.end_addr;
host_huge_range_start = prop->pmmu_huge.start_addr;
host_huge_range_end = prop->pmmu_huge.end_addr;
} else { } else {
dram_range_start = prop->dram_user_base_address; dram_range_start = prop->dram_user_base_address;
dram_range_end = prop->dram_end_address; dram_range_end = prop->dram_end_address;
host_range_start = prop->dram_user_base_address; host_range_start = prop->dram_user_base_address;
host_range_end = prop->dram_end_address; host_range_end = prop->dram_end_address;
host_huge_range_start = prop->dram_user_base_address;
host_huge_range_end = prop->dram_end_address;
} }
return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
dram_range_start, dram_range_end); host_huge_range_start,
} host_huge_range_end,
dram_range_start,
/* dram_range_end);
* hl_va_range_fini - clear a virtual addresses range
*
* @hdev : pointer to the habanalabs structure
* va_range : pointer to virtual addresses range
*
* This function does the following:
* - Frees the virtual addresses block list and its lock
*/
static void hl_va_range_fini(struct hl_device *hdev,
struct hl_va_range *va_range)
{
mutex_lock(&va_range->lock);
clear_va_list_locked(hdev, &va_range->list);
mutex_unlock(&va_range->lock);
mutex_destroy(&va_range->lock);
} }
/* /*
...@@ -1664,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) ...@@ -1664,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
} }
spin_unlock(&vm->idr_lock); spin_unlock(&vm->idr_lock);
hl_va_range_fini(hdev, &ctx->dram_va_range); va_range_fini(hdev, ctx->dram_va_range);
hl_va_range_fini(hdev, &ctx->host_va_range); if (hdev->pmmu_huge_range)
va_range_fini(hdev, ctx->host_huge_va_range);
va_range_fini(hdev, ctx->host_va_range);
mutex_destroy(&ctx->mem_hash_lock); mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx); hl_mmu_ctx_fini(ctx);
......
...@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) ...@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
return phys_hop_addr + pte_offset; return phys_hop_addr + pte_offset;
} }
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
}
static int dram_default_mapping_init(struct hl_ctx *ctx) static int dram_default_mapping_init(struct hl_ctx *ctx)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
...@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
curr_pte; curr_pte;
bool is_huge, clear_hop3 = true; bool is_huge, clear_hop3 = true;
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
...@@ -637,29 +647,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -637,29 +647,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
clear_hop3 = true; clear_hop3 = true;
if (!clear_hop3) if (!clear_hop3)
goto flush; goto mapped;
clear_pte(ctx, hop3_pte_addr); clear_pte(ctx, hop3_pte_addr);
if (put_pte(ctx, hop3_addr)) if (put_pte(ctx, hop3_addr))
goto flush; goto mapped;
clear_pte(ctx, hop2_pte_addr); clear_pte(ctx, hop2_pte_addr);
if (put_pte(ctx, hop2_addr)) if (put_pte(ctx, hop2_addr))
goto flush; goto mapped;
clear_pte(ctx, hop1_pte_addr); clear_pte(ctx, hop1_pte_addr);
if (put_pte(ctx, hop1_addr)) if (put_pte(ctx, hop1_addr))
goto flush; goto mapped;
clear_pte(ctx, hop0_pte_addr); clear_pte(ctx, hop0_pte_addr);
} }
flush: mapped:
flush(ctx);
return 0; return 0;
not_mapped: not_mapped:
...@@ -675,6 +683,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -675,6 +683,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
* @ctx: pointer to the context structure * @ctx: pointer to the context structure
* @virt_addr: virt addr to map from * @virt_addr: virt addr to map from
* @page_size: size of the page to unmap * @page_size: size of the page to unmap
* @flush_pte: whether to do a PCI flush
* *
* This function does the following: * This function does the following:
* - Check that the virt addr is mapped * - Check that the virt addr is mapped
...@@ -685,40 +694,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -685,40 +694,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
* changes the MMU hash, it must be protected by a lock. * changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented * However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area * in a higher level in order to protect the entire mapping of the memory area
*
* For optimization reasons PCI flush may be requested once after unmapping of
* large area.
*/ */
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop; struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr; u64 real_virt_addr;
u32 real_page_size, npages; u32 real_page_size, npages;
int i, rc; int i, rc = 0;
bool is_dram_addr; bool is_dram_addr;
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = is_dram_va(hdev, virt_addr);
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/* /*
* The H/W handles mapping of specific page sizes. Hence if the page * The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately. * size is bigger, we break it to sub-pages and unmap them separately.
*/ */
if ((page_size % mmu_prop->huge_page_size) == 0) { if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not %uKB nor %uMB aligned, can't unmap\n", "page size of %u is not %uKB aligned, can't unmap\n",
page_size, page_size, mmu_prop->page_size >> 10);
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
return -EFAULT; return -EFAULT;
} }
...@@ -729,12 +741,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) ...@@ -729,12 +741,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
for (i = 0 ; i < npages ; i++) { for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
if (rc) if (rc)
return rc; break;
real_virt_addr += real_page_size; real_virt_addr += real_page_size;
} }
return 0; if (flush_pte)
flush(ctx);
return rc;
} }
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
...@@ -753,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -753,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_new = false, is_huge; hop4_new = false, is_huge;
int rc = -ENOMEM; int rc = -ENOMEM;
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/* /*
* This mapping function can map a page or a huge page. For huge page * This mapping function can map a page or a huge page. For huge page
* there are only 3 hops rather than 4. Currently the DRAM allocation * there are only 3 hops rather than 4. Currently the DRAM allocation
...@@ -762,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -762,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* one of the two page sizes. Since this is a common code for all the * one of the two page sizes. Since this is a common code for all the
* three cases, we need this hugs page check. * three cases, we need this hugs page check.
*/ */
is_huge = page_size == mmu_prop->huge_page_size; if (is_dram_addr) {
mmu_prop = &prop->dmmu;
if (is_dram_addr && !is_huge) { is_huge = true;
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); } else if (page_size == prop->pmmu_huge.page_size) {
return -EFAULT; mmu_prop = &prop->pmmu_huge;
is_huge = true;
} else {
mmu_prop = &prop->pmmu;
is_huge = false;
} }
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
...@@ -885,8 +902,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -885,8 +902,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
get_pte(ctx, hop3_addr); get_pte(ctx, hop3_addr);
} }
flush(ctx);
return 0; return 0;
err: err:
...@@ -909,6 +924,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -909,6 +924,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* @virt_addr: virt addr to map from * @virt_addr: virt addr to map from
* @phys_addr: phys addr to map to * @phys_addr: phys addr to map to
* @page_size: physical page size * @page_size: physical page size
* @flush_pte: whether to do a PCI flush
* *
* This function does the following: * This function does the following:
* - Check that the virt addr is not mapped * - Check that the virt addr is not mapped
...@@ -919,8 +935,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -919,8 +935,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* changes the MMU hash, it must be protected by a lock. * changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented * However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area * in a higher level in order to protect the entire mapping of the memory area
*
* For optimization reasons PCI flush may be requested once after mapping of
* large area.
*/ */
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
bool flush_pte)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
...@@ -933,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -933,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = is_dram_va(hdev, virt_addr);
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/* /*
* The H/W handles mapping of specific page sizes. Hence if the page * The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately. * size is bigger, we break it to sub-pages and map them separately.
*/ */
if ((page_size % mmu_prop->huge_page_size) == 0) { if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not %dKB nor %dMB aligned, can't unmap\n", "page size of %u is not %uKB aligned, can't unmap\n",
page_size, page_size, mmu_prop->page_size >> 10);
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
return -EFAULT; return -EFAULT;
} }
...@@ -976,6 +995,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -976,6 +995,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
mapped_cnt++; mapped_cnt++;
} }
if (flush_pte)
flush(ctx);
return 0; return 0;
err: err:
...@@ -988,6 +1010,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -988,6 +1010,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
real_virt_addr += real_page_size; real_virt_addr += real_page_size;
} }
flush(ctx);
return rc; return rc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment