Commit 9d20328d authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2020-03-24' of...

Merge tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.7:

- MMU code improvements that includes:
  - Flush MMU TLB cache only once, at the end of mapping/unmapping
    function, instead of flushing after mapping of every page.
  - Add future ASIC support by splitting properties of ASIC capabilities
    regarding mapping of host memory to regular and huge pages.

- Add debugfs interface to write and read 64-bit values from the device's
  memory/registers. Previously the driver provided interface for 32-bit
  values and this will allow the user to debug much more quickly. We saw it
  gives a boost of around 1.5 - 1.7 when reading internal memories.

- Support temperature offset via sysfs as defined in
  https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface

- Display historical maximum of various sensors.

- Print to kernel log when clock throttling occurs to due breach of power
  or thermal envelope. Also prints when clock throttling is finished
  (clock is back to optimal).

- Fix bug when moving from manual to auto power-management mode.

- Print a message ("unsupported device") to kernel log in case a GAUDI device
  is recognized.

- Small bug fixes and minor improvements to code.

* tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: fix pm manual->auto in GOYA
  habanalabs: show unsupported message for GAUDI
  habanalabs: add print upon clock change
  habanalabs: update goya firmware register map
  habanalabs: Add missing annotation for goya_hw_queues_unlock()
  habanalabs: Add missing annotation for goya_hw_queues_lock()
  habanalabs: Remove unused parse_cnt variable
  habanalabs: provide historical maximum of various sensors
  habanalabs: modify the return values of hl_read/write routines
  habanalabs: support temperature offset via sysfs
  habanalabs: ratelimit error prints of IRQs
  habanalabs: add debugfs write64/read64
  habanalabs: fix DDR bar address setting
  habanalabs: removing extra ;
  habanalabs: Avoid running restore chunks if no execute chunks
  habanalabs: Modify CS jobs counter to u16
  habanalabs: split the host MMU properties
  habanalabs: use the user CB size as a default job size
  habanalabs: flush only at the end of the map/unmap
parents bbde5709 11845501
......@@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/data64
Date: Jan 2020
KernelVersion: 5.6
Contact: oded.gabbay@gmail.com
Description: Allows the root user to read or write 64 bit data directly
through the device's PCI bar. Writing to this file generates a
write transaction while reading from the file generates a read
transaction. This custom interface is needed (instead of using
the generic Linux user-space PCI mapping) because the DDR bar
is very small compared to the DDR memory and only the driver can
move the bar before and after the transaction.
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/device
Date: Jan 2019
KernelVersion: 5.1
......
......@@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
spin_unlock(&job->user_cb->lock);
hl_cb_put(job->user_cb);
job->user_cb = NULL;
} else if (!rc) {
job->job_cb_size = job->user_cb_size;
}
return rc;
......@@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cb *cb;
bool int_queues_only = true;
u32 size_to_copy;
int rc, i, parse_cnt;
int rc, i;
*cs_seq = ULLONG_MAX;
......@@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
hl_debugfs_add_cs(cs);
/* Validate ALL the CS chunks before submitting the CS */
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
enum hl_queue_type queue_type;
bool is_kernel_allocated_cb;
......@@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
job->cs = cs;
job->user_cb = cb;
job->user_cb_size = chunk->cb_size;
if (is_kernel_allocated_cb)
job->job_cb_size = cb->size;
else
job->job_cb_size = chunk->cb_size;
job->hw_queue_id = chunk->queue_index;
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
......@@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
struct hl_device *hdev = hpriv->hdev;
union hl_cs_args *args = data;
struct hl_ctx *ctx = hpriv->ctx;
void __user *chunks;
u32 num_chunks;
void __user *chunks_execute, *chunks_restore;
u32 num_chunks_execute, num_chunks_restore;
u64 cs_seq = ULONG_MAX;
int rc, do_ctx_switch;
bool need_soft_reset = false;
......@@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
goto out;
}
chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
num_chunks_execute = args->in.num_chunks_execute;
if (!num_chunks_execute) {
dev_err(hdev->dev,
"Got execute CS with 0 chunks, context %d\n",
ctx->asid);
rc = -EINVAL;
goto out;
}
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
long ret;
chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
num_chunks = args->in.num_chunks_restore;
chunks_restore =
(void __user *) (uintptr_t) args->in.chunks_restore;
num_chunks_restore = args->in.num_chunks_restore;
mutex_lock(&hpriv->restore_phase_mutex);
......@@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
hdev->asic_funcs->restore_phase_topology(hdev);
if (num_chunks == 0) {
if (!num_chunks_restore) {
dev_dbg(hdev->dev,
"Need to run restore phase but restore CS is empty\n");
rc = 0;
} else {
rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
&cs_seq);
rc = _hl_cs_ioctl(hpriv, chunks_restore,
num_chunks_restore, &cs_seq);
}
mutex_unlock(&hpriv->restore_phase_mutex);
......@@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
}
/* Need to wait for restore completion before execution phase */
if (num_chunks > 0) {
if (num_chunks_restore) {
ret = _hl_cs_wait_ioctl(hdev, ctx,
jiffies_to_usecs(hdev->timeout_jiffies),
cs_seq);
......@@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
}
}
chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
num_chunks = args->in.num_chunks_execute;
if (num_chunks == 0) {
dev_err(hdev->dev,
"Got execute CS with 0 chunks, context %d\n",
ctx->asid);
rc = -EINVAL;
goto out;
}
rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);
out:
if (rc != -EAGAIN) {
......
......@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
prop->dmmu.start_addr,
prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
......@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
goto out;
if (hdev->dram_supports_virtual_memory &&
addr >= prop->va_space_dram_start_address &&
addr < prop->va_space_dram_end_address)
(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
return true;
if (addr >= prop->va_space_host_start_address &&
addr < prop->va_space_host_end_address)
if (addr >= prop->pmmu.start_addr &&
addr < prop->pmmu.end_addr)
return true;
if (addr >= prop->pmmu_huge.start_addr &&
addr < prop->pmmu_huge.end_addr)
return true;
out:
return false;
......@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
prop->dmmu.start_addr,
prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
......@@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
return count;
}
static ssize_t hl_data_read64(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[32];
u64 addr = entry->addr;
u64 val;
ssize_t rc;
if (*ppos)
return 0;
if (hl_is_device_va(hdev, addr)) {
rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
if (rc) {
dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
return rc;
}
sprintf(tmp_buf, "0x%016llx\n", val);
return simple_read_from_buffer(buf, count, ppos, tmp_buf,
strlen(tmp_buf));
}
static ssize_t hl_data_write64(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
u64 value;
ssize_t rc;
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
if (hl_is_device_va(hdev, addr)) {
rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
if (rc) {
dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
value, addr);
return rc;
}
return count;
}
static ssize_t hl_get_power_state(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
......@@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
.write = hl_data_write32
};
static const struct file_operations hl_data64b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read64,
.write = hl_data_write64
};
static const struct file_operations hl_i2c_data_fops = {
.owner = THIS_MODULE,
.read = hl_i2c_data_read,
......@@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_data32b_fops);
debugfs_create_file("data64",
0644,
dev_entry->root,
dev_entry,
&hl_data64b_fops);
debugfs_create_file("set_power_state",
0200,
dev_entry->root,
......
......@@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
status = HL_DEVICE_STATUS_OPERATIONAL;
return status;
};
}
static void hpriv_release(struct kref *ref)
{
......
This diff is collapsed.
......@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
u64 range_start, range_end;
if (hdev->mmu_enable) {
range_start = prop->va_space_dram_start_address;
range_end = prop->va_space_dram_end_address;
range_start = prop->dmmu.start_addr;
range_end = prop->dmmu.end_addr;
} else {
range_start = prop->dram_user_base_address;
range_end = prop->dram_end_address;
......
......@@ -298,8 +298,8 @@ static ssize_t pm_mng_profile_store(struct device *dev,
/* Make sure we are in LOW PLL when changing modes */
if (hdev->pm_mng_profile == PM_MANUAL) {
hdev->curr_pll_profile = PLL_HIGH;
hl_device_set_frequency(hdev, PLL_LOW);
hdev->pm_mng_profile = PM_AUTO;
hl_device_set_frequency(hdev, PLL_LOW);
}
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
if (hdev->pm_mng_profile == PM_AUTO) {
......
......@@ -132,6 +132,8 @@ enum hl_device_hw_state {
/**
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @start_addr: virtual start address of the memory region.
* @end_addr: virtual end address of the memory region.
* @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask.
......@@ -143,9 +145,10 @@ enum hl_device_hw_state {
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory.
* @huge_page_size: page size used to allocate memory with huge pages.
*/
struct hl_mmu_properties {
u64 start_addr;
u64 end_addr;
u64 hop0_shift;
u64 hop1_shift;
u64 hop2_shift;
......@@ -157,7 +160,6 @@ struct hl_mmu_properties {
u64 hop3_mask;
u64 hop4_mask;
u32 page_size;
u32 huge_page_size;
};
/**
......@@ -169,6 +171,8 @@ struct hl_mmu_properties {
* @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties.
* @pmmu_huge: PCI (host) MMU address translation properties for memory
* allocated with huge pages.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
......@@ -178,14 +182,6 @@ struct hl_mmu_properties {
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset
* @va_space_host_start_address: base address of virtual memory range for
* mapping host memory.
* @va_space_host_end_address: end address of virtual memory range for
* mapping host memory.
* @va_space_dram_start_address: base address of virtual memory range for
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
* @pcie_dbi_base_address: Base address of the PCIE_DBI block.
......@@ -218,6 +214,7 @@ struct asic_fixed_properties {
char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu;
struct hl_mmu_properties pmmu_huge;
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
......@@ -227,10 +224,6 @@ struct asic_fixed_properties {
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
......@@ -431,10 +424,12 @@ struct hl_eq {
* enum hl_asic_type - supported ASIC types.
* @ASIC_INVALID: Invalid ASIC type.
* @ASIC_GOYA: Goya device.
* @ASIC_GAUDI: Gaudi device.
*/
enum hl_asic_type {
ASIC_INVALID,
ASIC_GOYA
ASIC_GOYA,
ASIC_GAUDI
};
struct hl_cs_parser;
......@@ -589,6 +584,8 @@ struct hl_asic_funcs {
void (*restore_phase_topology)(struct hl_device *hdev);
int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
void (*add_device_attr)(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void (*handle_eqe)(struct hl_device *hdev,
......@@ -658,6 +655,8 @@ struct hl_va_range {
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of DMA fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings.
* @host_huge_va_range: holds available virtual addresses for host mappings
* with huge pages.
* @dram_va_range: holds available virtual addresses for DRAM mappings.
* @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
......@@ -688,8 +687,9 @@ struct hl_ctx {
struct hl_device *hdev;
struct kref refcount;
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
struct hl_va_range host_va_range;
struct hl_va_range dram_va_range;
struct hl_va_range *host_va_range;
struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range;
struct mutex mem_hash_lock;
struct mutex mmu_lock;
struct list_head debugfs_list;
......@@ -763,7 +763,7 @@ struct hl_userptr {
* @aborted: true if CS was aborted due to some device error.
*/
struct hl_cs {
u8 jobs_in_queue_cnt[HL_MAX_QUEUES];
u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
struct hl_ctx *ctx;
struct list_head job_list;
spinlock_t job_lock;
......@@ -1291,6 +1291,8 @@ struct hl_device_idle_busy_ts {
* otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled.
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
* huge pages.
* @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
......@@ -1372,6 +1374,7 @@ struct hl_device {
u8 reset_on_lockup;
u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping;
u8 pmmu_huge_range;
u8 init_done;
u8 device_cpu_disabled;
u8 dma_mask;
......@@ -1573,8 +1576,10 @@ int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev);
int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool flush_pte);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
......@@ -1606,11 +1611,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_set_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
u64 hl_get_max_power(struct hl_device *hdev);
......
......@@ -40,12 +40,13 @@ MODULE_PARM_DESC(reset_on_lockup,
#define PCI_VENDOR_ID_HABANALABS 0x1da3
#define PCI_IDS_GOYA 0x0001
#define PCI_IDS_GAUDI 0x1000
static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ids);
/*
* get_asic_type - translate device id to asic type
......@@ -63,6 +64,9 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GOYA:
asic_type = ASIC_GOYA;
break;
case PCI_IDS_GAUDI:
asic_type = ASIC_GAUDI;
break;
default:
asic_type = ASIC_INVALID;
break;
......@@ -263,6 +267,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
dev_err(&pdev->dev, "Unsupported ASIC\n");
rc = -ENODEV;
goto free_hdev;
} else if (hdev->asic_type == ASIC_GAUDI) {
dev_err(&pdev->dev,
"GAUDI is not supported by the current kernel\n");
rc = -ENODEV;
goto free_hdev;
}
} else {
hdev->asic_type = asic_type;
......
......@@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
......@@ -125,36 +126,40 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp_crit:
case hwmon_temp_max_hyst:
case hwmon_temp_crit_hyst:
case hwmon_temp_offset:
case hwmon_temp_highest:
break;
default:
return -EINVAL;
}
*val = hl_get_temperature(hdev, channel, attr);
rc = hl_get_temperature(hdev, channel, attr, val);
break;
case hwmon_in:
switch (attr) {
case hwmon_in_input:
case hwmon_in_min:
case hwmon_in_max:
case hwmon_in_highest:
break;
default:
return -EINVAL;
}
*val = hl_get_voltage(hdev, channel, attr);
rc = hl_get_voltage(hdev, channel, attr, val);
break;
case hwmon_curr:
switch (attr) {
case hwmon_curr_input:
case hwmon_curr_min:
case hwmon_curr_max:
case hwmon_curr_highest:
break;
default:
return -EINVAL;
}
*val = hl_get_current(hdev, channel, attr);
rc = hl_get_current(hdev, channel, attr, val);
break;
case hwmon_fan:
switch (attr) {
......@@ -165,7 +170,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default:
return -EINVAL;
}
*val = hl_get_fan_speed(hdev, channel, attr);
rc = hl_get_fan_speed(hdev, channel, attr, val);
break;
case hwmon_pwm:
switch (attr) {
......@@ -175,12 +180,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default:
return -EINVAL;
}
*val = hl_get_pwm_info(hdev, channel, attr);
rc = hl_get_pwm_info(hdev, channel, attr, val);
break;
default:
return -EINVAL;
}
return 0;
return rc;
}
static int hl_write(struct device *dev, enum hwmon_sensor_types type,
......@@ -192,6 +197,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
return -ENODEV;
switch (type) {
case hwmon_temp:
switch (attr) {
case hwmon_temp_offset:
break;
default:
return -EINVAL;
}
hl_set_temperature(hdev, channel, attr, val);
break;
case hwmon_pwm:
switch (attr) {
case hwmon_pwm_input:
......@@ -219,7 +233,10 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_temp_max_hyst:
case hwmon_temp_crit:
case hwmon_temp_crit_hyst:
case hwmon_temp_highest:
return 0444;
case hwmon_temp_offset:
return 0644;
}
break;
case hwmon_in:
......@@ -227,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_in_input:
case hwmon_in_min:
case hwmon_in_max:
case hwmon_in_highest:
return 0444;
}
break;
......@@ -235,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_curr_input:
case hwmon_curr_min:
case hwmon_curr_max:
case hwmon_curr_highest:
return 0444;
}
break;
......@@ -265,10 +284,10 @@ static const struct hwmon_ops hl_hwmon_ops = {
.write = hl_write
};
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
......@@ -279,22 +298,47 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result);
SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc);
result = 0;
*value = 0;
}
return result;
return rc;
}
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
int hl_set_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct armcp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, NULL);
if (rc)
dev_err(hdev->dev,
"Failed to set temperature of sensor %d, error %d\n",
sensor_index, rc);
return rc;
}
int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
......@@ -305,22 +349,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result);
SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc);
result = 0;
*value = 0;
}
return result;
return rc;
}
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
......@@ -331,22 +375,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result);
SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get current from sensor %d, error %d\n",
sensor_index, rc);
result = 0;
*value = 0;
}
return result;
return rc;
}
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
......@@ -357,22 +401,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result);
SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc);
result = 0;
*value = 0;
}
return result;
return rc;
}
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
......@@ -383,16 +427,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, &result);
SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc);
result = 0;
*value = 0;
}
return result;
return rc;
}
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
......
......@@ -189,6 +189,10 @@ enum pq_init_status {
* ArmCP to write to the structure, to prevent data corruption in case of
* mismatched driver/FW versions.
*
* ARMCP_PACKET_TEMPERATURE_SET -
* Set the value of the offset property of a specified thermal sensor.
* The packet's arguments specify the desired sensor and the field to
* set.
*/
enum armcp_packet_id {
......@@ -214,6 +218,8 @@ enum armcp_packet_id {
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
ARMCP_RESERVED,
ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
};
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
......@@ -271,24 +277,32 @@ enum armcp_packet_rc {
armcp_packet_fault
};
/*
* armcp_temp_type should adhere to hwmon_temp_attributes
* defined in Linux kernel hwmon.h file
*/
enum armcp_temp_type {
armcp_temp_input,
armcp_temp_max = 6,
armcp_temp_max_hyst,
armcp_temp_crit,
armcp_temp_crit_hyst
armcp_temp_crit_hyst,
armcp_temp_offset = 19,
armcp_temp_highest = 22
};
enum armcp_in_attributes {
armcp_in_input,
armcp_in_min,
armcp_in_max
armcp_in_max,
armcp_in_highest = 7
};
enum armcp_curr_attributes {
armcp_curr_input,
armcp_curr_min,
armcp_curr_max
armcp_curr_max,
armcp_curr_highest = 7
};
enum armcp_fan_attributes {
......
......@@ -188,6 +188,10 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E = 510,
GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
GOYA_ASYNC_EVENT_ID_SIZE
};
......
......@@ -11,24 +11,27 @@
/*
* PSOC scratch-pad registers
*/
#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
#define mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS mmPSOC_GLOBAL_CONF_WARM_REBOOT
#endif /* GOYA_REG_MAP_H_ */
......@@ -8,20 +8,35 @@
#ifndef HL_BOOT_IF_H
#define HL_BOOT_IF_H
#define LKD_HARD_RESET_MAGIC 0xED7BD694
/* CPU error bits in BOOT_ERROR registers */
#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
enum cpu_boot_status {
CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
CPU_BOOT_STATUS_IN_WFE,
CPU_BOOT_STATUS_DRAM_RDY,
CPU_BOOT_STATUS_SRAM_AVAIL,
CPU_BOOT_STATUS_IN_BTL, /* BTL is H/W FSM */
CPU_BOOT_STATUS_IN_PREBOOT,
CPU_BOOT_STATUS_IN_SPL,
CPU_BOOT_STATUS_IN_UBOOT,
CPU_BOOT_STATUS_DRAM_INIT_FAIL,
CPU_BOOT_STATUS_FIT_CORRUPTED,
CPU_BOOT_STATUS_UBOOT_NOT_READY,
CPU_BOOT_STATUS_RESERVED,
CPU_BOOT_STATUS_TS_INIT_FAIL,
CPU_BOOT_STATUS_IN_WFE = 1,
CPU_BOOT_STATUS_DRAM_RDY = 2,
CPU_BOOT_STATUS_SRAM_AVAIL = 3,
CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
CPU_BOOT_STATUS_IN_PREBOOT = 5,
CPU_BOOT_STATUS_IN_SPL = 6,
CPU_BOOT_STATUS_IN_UBOOT = 7,
CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
CPU_BOOT_STATUS_NIC_FW_RDY = 11,
CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
CPU_BOOT_STATUS_READY_TO_BOOT = 15,
};
enum kmd_msg {
......
This diff is collapsed.
......@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
return phys_hop_addr + pte_offset;
}
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
}
static int dram_default_mapping_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
......@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
curr_pte;
bool is_huge, clear_hop3 = true;
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx);
......@@ -637,29 +647,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
clear_hop3 = true;
if (!clear_hop3)
goto flush;
goto mapped;
clear_pte(ctx, hop3_pte_addr);
if (put_pte(ctx, hop3_addr))
goto flush;
goto mapped;
clear_pte(ctx, hop2_pte_addr);
if (put_pte(ctx, hop2_addr))
goto flush;
goto mapped;
clear_pte(ctx, hop1_pte_addr);
if (put_pte(ctx, hop1_addr))
goto flush;
goto mapped;
clear_pte(ctx, hop0_pte_addr);
}
flush:
flush(ctx);
mapped:
return 0;
not_mapped:
......@@ -675,6 +683,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
* @ctx: pointer to the context structure
* @virt_addr: virt addr to map from
* @page_size: size of the page to unmap
* @flush_pte: whether to do a PCI flush
*
* This function does the following:
* - Check that the virt addr is mapped
......@@ -685,40 +694,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
* changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area
*
* For optimization reasons PCI flush may be requested once after unmapping of
* large area.
*/
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr;
u32 real_page_size, npages;
int i, rc;
int i, rc = 0;
bool is_dram_addr;
if (!hdev->mmu_enable)
return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
is_dram_addr = is_dram_va(hdev, virt_addr);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
*/
if ((page_size % mmu_prop->huge_page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
page_size,
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
"page size of %u is not %uKB aligned, can't unmap\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
......@@ -729,12 +741,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
if (rc)
return rc;
break;
real_virt_addr += real_page_size;
}
return 0;
if (flush_pte)
flush(ctx);
return rc;
}
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
......@@ -753,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_new = false, is_huge;
int rc = -ENOMEM;
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/*
* This mapping function can map a page or a huge page. For huge page
* there are only 3 hops rather than 4. Currently the DRAM allocation
......@@ -762,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* one of the two page sizes. Since this is a common code for all the
* three cases, we need this hugs page check.
*/
is_huge = page_size == mmu_prop->huge_page_size;
if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
return -EFAULT;
if (is_dram_addr) {
mmu_prop = &prop->dmmu;
is_huge = true;
} else if (page_size == prop->pmmu_huge.page_size) {
mmu_prop = &prop->pmmu_huge;
is_huge = true;
} else {
mmu_prop = &prop->pmmu;
is_huge = false;
}
hop0_addr = get_hop0_addr(ctx);
......@@ -885,8 +902,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
get_pte(ctx, hop3_addr);
}
flush(ctx);
return 0;
err:
......@@ -909,6 +924,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* @virt_addr: virt addr to map from
* @phys_addr: phys addr to map to
* @page_size: physical page size
* @flush_pte: whether to do a PCI flush
*
* This function does the following:
* - Check that the virt addr is not mapped
......@@ -919,8 +935,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area
*
* For optimization reasons PCI flush may be requested once after mapping of
* large area.
*/
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
......@@ -933,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
if (!hdev->mmu_enable)
return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
is_dram_addr = is_dram_va(hdev, virt_addr);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately.
*/
if ((page_size % mmu_prop->huge_page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
page_size,
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
"page size of %u is not %uKB aligned, can't unmap\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
......@@ -976,6 +995,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
mapped_cnt++;
}
if (flush_pte)
flush(ctx);
return 0;
err:
......@@ -988,6 +1010,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
real_virt_addr += real_page_size;
}
flush(ctx);
return rc;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment