Commit aa775edb authored by Daniel Vetter's avatar Daniel Vetter

Merge tag 'drm-habanalabs-next-2024-02-26' of...

Merge tag 'drm-habanalabs-next-2024-02-26' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next

This tag contains habanalabs driver and accel changes for v6.9.

The notable changes are:

- New features and improvements:
  - Configure interrupt affinity according to NUMA nodes for the MSI-X interrupts that are
    assigned to the userspace application which acquires the device.
  - Move the HBM MMU page tables to reside inside the HBM to minimize latency when doing
    page-walks.
  - Improve the device reset mechanism when consecutive heartbeat failures occur (firmware
    fails to ack on heartbeat message).
  - Check also extended errors in the PCIe addr_dec interrupt information.
  - Rate limit the error messages that can be printed to dmesg log by userspace actions.

- Firmware related fixes:
  - Handle requests from firmware to reserve device memory

- Bug fixes and code cleanups:
  - constify the struct device_type usage in accel (accel_sysfs_device_minor).
  - Fix the PCI health check by reading uncached register.
  - Fix reporting of drain events.
  - Fix debugfs files permissions.
  - Fix calculation of DRAM BAR base address.
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
From: Oded Gabbay <ogabbay@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/ZdxJprop0EniVQtf@ogabbay-vm-u22.habana-labs.com
parents 19b232b9 576d7cc5
......@@ -23,7 +23,7 @@ static struct idr accel_minors_idr;
static struct dentry *accel_debugfs_root;
static struct device_type accel_sysfs_device_minor = {
static const struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
};
......
......@@ -1360,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
return -EINVAL;
}
if (!hl_device_operational(hdev, &status)) {
if (!hl_device_operational(hdev, &status))
return -EBUSY;
}
if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
!hdev->supports_staged_submission) {
......
......@@ -484,7 +484,7 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
char kbuf[MMU_KBUF_SIZE];
char kbuf[MMU_KBUF_SIZE] = {0};
char *c;
ssize_t rc;
......@@ -546,7 +546,7 @@ static ssize_t mmu_ack_error_value_write(struct file *file,
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
char kbuf[MMU_KBUF_SIZE];
char kbuf[MMU_KBUF_SIZE] = {0};
ssize_t rc;
if (count > sizeof(kbuf) - 1)
......@@ -1643,19 +1643,19 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hl_data64b_fops);
debugfs_create_file("set_power_state",
0200,
0644,
root,
dev_entry,
&hl_power_fops);
debugfs_create_file("device",
0200,
0644,
root,
dev_entry,
&hl_device_fops);
debugfs_create_file("clk_gate",
0200,
0644,
root,
dev_entry,
&hl_clk_gate_fops);
......@@ -1667,13 +1667,13 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hl_stop_on_err_fops);
debugfs_create_file("dump_security_violations",
0644,
0400,
root,
dev_entry,
&hl_security_violations_fops);
debugfs_create_file("dump_razwi_events",
0644,
0400,
root,
dev_entry,
&hl_razwi_check_fops);
......@@ -1706,7 +1706,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hdev->reset_info.skip_reset_on_timeout);
debugfs_create_file("state_dump",
0600,
0644,
root,
dev_entry,
&hl_state_dump_fops);
......@@ -1724,7 +1724,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,
0644,
root,
entry,
&hl_debugfs_fops);
......
......@@ -55,7 +55,8 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi
if (is_power_of_2(prop->dram_pci_bar_size))
bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
else
bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
bar_base_addr = region->region_base +
div64_u64((addr - region->region_base), prop->dram_pci_bar_size) *
prop->dram_pci_bar_size;
old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
......@@ -1034,14 +1035,14 @@ static void device_early_fini(struct hl_device *hdev)
static bool is_pci_link_healthy(struct hl_device *hdev)
{
u16 vendor_id;
u16 device_id;
if (!hdev->pdev)
return false;
pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id);
pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id);
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
return (device_id == hdev->pdev->device);
}
static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
......@@ -1768,14 +1769,16 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->device_cpu_disabled = false;
hdev->reset_info.hard_reset_pending = false;
/*
* Put the device in an unusable state if there are 2 back to back resets due to
* fatal errors.
*/
if (hdev->reset_info.reset_trigger_repeated &&
(hdev->reset_info.prev_reset_trigger ==
HL_DRV_RESET_FW_FATAL_ERR)) {
/* if there 2 back to back resets from FW,
* ensure driver puts the driver in a unusable state
*/
(hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR ||
hdev->reset_info.prev_reset_trigger ==
HL_DRV_RESET_HEARTBEAT)) {
dev_crit(hdev->dev,
"%s Consecutive FW fatal errors received, stopping hard reset\n",
"%s Consecutive fatal errors, stopping hard reset\n",
dev_name(&(hdev)->pdev->dev));
rc = -EIO;
goto out_err;
......@@ -2801,3 +2804,35 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
atomic_set(&captured_err_info->cs_timeout.write_enable, 1);
captured_err_info->undef_opcode.write_enable = true;
}
void hl_init_cpu_for_irq(struct hl_device *hdev)
{
#ifdef CONFIG_NUMA
struct cpumask *available_mask = &hdev->irq_affinity_mask;
int numa_node = hdev->pdev->dev.numa_node, i;
static struct cpumask cpu_mask;
if (numa_node < 0)
return;
if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) {
dev_err(hdev->dev, "No available affinities in current numa node\n");
return;
}
/* Remove HT siblings */
for_each_cpu(i, &cpu_mask)
cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask);
#endif
}
void hl_set_irq_affinity(struct hl_device *hdev, int irq)
{
if (cpumask_empty(&hdev->irq_affinity_mask)) {
dev_dbg(hdev->dev, "affinity mask is empty\n");
return;
}
if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask))
dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
}
......@@ -501,7 +501,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
dev_err(hdev->dev, "failed to unmask event %d", event_type);
return rc;
}
......@@ -540,7 +540,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
total_pkt_size, 0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask IRQ array\n");
dev_err(hdev->dev, "failed to unmask event array\n");
kfree(pkt);
......@@ -2718,18 +2718,20 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms));
if (rc)
goto protocol_err;
if (hdev->asic_prop.support_dynamic_resereved_fw_size)
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M;
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info;
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms));
if (rc)
goto protocol_err;
/* read preboot version */
rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
if (rc)
return rc;
......@@ -2756,11 +2758,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->decoder_binning, hdev->rotator_binning);
}
if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
}
return 0;
}
......@@ -2795,7 +2792,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
if (!(hdev->fw_components & FW_TYPE_LINUX)) {
dev_info(hdev->dev, "Skip loading Linux F/W\n");
dev_dbg(hdev->dev, "Skip loading Linux F/W\n");
return 0;
}
......
......@@ -443,18 +443,22 @@ enum hl_collective_mode {
* a CB handle can be provided for jobs on this queue.
* Otherwise, a CB address must be provided.
* @collective_mode: collective mode of current queue
* @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM.
* @driver_only: true if only the driver is allowed to send a job to this queue,
* false otherwise.
* @binned: True if the queue is binned out and should not be used
* @supports_sync_stream: True if queue supports sync stream
* @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/
struct hw_queue_properties {
enum hl_queue_type type;
enum queue_cb_alloc_flags cb_alloc_flags;
enum hl_collective_mode collective_mode;
u64 q_dram_bd_address;
u8 driver_only;
u8 binned;
u8 supports_sync_stream;
u8 dram_bd;
};
/**
......@@ -590,8 +594,6 @@ struct hl_hints_range {
* we display to the user
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
* @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
* @dram_page_size: The DRAM physical page size.
* @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM.
......@@ -645,10 +647,10 @@ struct hl_hints_range {
* @num_engine_cores: number of engine cpu cores.
* @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size.
* @glbl_err_cause_num: global err cause number.
* @glbl_err_max_cause_num: global err max cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
* @reserved_fw_mem_size: size of dram memory reserved for FW.
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
......@@ -743,8 +745,6 @@ struct asic_fixed_properties {
u32 clk_pll_index;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
u32 mmu_hop0_tables_total_size;
u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
......@@ -779,7 +779,7 @@ struct asic_fixed_properties {
u32 num_engine_cores;
u32 max_num_of_engines;
u32 num_of_special_blocks;
u32 glbl_err_cause_num;
u32 glbl_err_max_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
u16 collective_first_sob;
......@@ -1052,6 +1052,8 @@ struct hl_encaps_signals_mgr {
* @collective_mode: collective mode of current queue
* @kernel_address: holds the queue's kernel virtual address.
* @bus_address: holds the queue's DMA address.
* @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in
* queue properites.
* @pi: holds the queue's pi value.
* @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
* @hw_queue_id: the id of the H/W queue.
......@@ -1061,6 +1063,7 @@ struct hl_encaps_signals_mgr {
* @valid: is the queue valid (we have array of 32 queues, not all of them
* exist).
* @supports_sync_stream: True if queue supports sync stream
* @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/
struct hl_hw_queue {
struct hl_cs_job **shadow_queue;
......@@ -1069,6 +1072,7 @@ struct hl_hw_queue {
enum hl_collective_mode collective_mode;
void *kernel_address;
dma_addr_t bus_address;
u64 pq_dram_address;
u32 pi;
atomic_t ci;
u32 hw_queue_id;
......@@ -1077,6 +1081,7 @@ struct hl_hw_queue {
u16 int_queue_len;
u8 valid;
u8 supports_sync_stream;
u8 dram_bd;
};
/**
......@@ -2547,7 +2552,7 @@ struct hl_state_dump_specs {
* DEVICES
*/
#define HL_STR_MAX 32
#define HL_STR_MAX 64
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
......@@ -3257,6 +3262,7 @@ struct hl_reset_info {
* @clk_throttling: holds information about current/previous clock throttling events
* @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
* @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_inner_major_ver: the major of current loaded preboot inner version.
* @fw_inner_minor_ver: the minor of current loaded preboot inner version.
......@@ -3446,6 +3452,8 @@ struct hl_device {
struct hl_reset_info reset_info;
cpumask_t irq_affinity_mask;
u32 *stream_master_qid_arr;
u32 fw_inner_major_ver;
u32 fw_inner_minor_ver;
......@@ -3886,6 +3894,7 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
struct hl_hr_mmu_funcs *hr_func);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
......@@ -3893,6 +3902,22 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr);
void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr);
void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info);
u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx);
u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx);
void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr);
u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr);
int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr);
u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop);
u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx);
void hl_mmu_dr_flush(struct hl_ctx *ctx);
int hl_mmu_dr_init(struct hl_device *hdev);
void hl_mmu_dr_fini(struct hl_device *hdev);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
......@@ -4032,6 +4057,8 @@ void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_
void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
void hl_init_cpu_for_irq(struct hl_device *hdev);
void hl_set_irq_affinity(struct hl_device *hdev, int irq);
#ifdef CONFIG_DEBUG_FS
......
......@@ -84,6 +84,8 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
u32 ctl, u32 len, u64 ptr)
{
struct hl_bd *bd;
u64 addr;
int i;
bd = q->kernel_address;
bd += hl_pi_2_offset(q->pi);
......@@ -91,7 +93,16 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
bd->len = cpu_to_le32(len);
bd->ptr = cpu_to_le64(ptr);
if (q->dram_bd)
for (i = 0 ; i < 2 ; i++) {
addr = q->pq_dram_address +
((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd)) + (i * sizeof(u64)));
hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, addr,
(u64 *)(bd) + i, DEBUGFS_WRITE64);
}
q->pi = hl_queue_inc_ptr(q->pi);
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
}
......@@ -1087,12 +1098,18 @@ int hl_hw_queues_create(struct hl_device *hdev)
q->supports_sync_stream =
asic->hw_queues_props[i].supports_sync_stream;
q->collective_mode = asic->hw_queues_props[i].collective_mode;
q->dram_bd = asic->hw_queues_props[i].dram_bd;
rc = queue_init(hdev, q, i);
if (rc) {
dev_err(hdev->dev,
"failed to initialize queue %d\n", i);
goto release_queues;
}
/* Set DRAM PQ address for the queue if it should be at DRAM */
if (q->dram_bd)
q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address;
}
return 0;
......
......@@ -46,7 +46,7 @@ static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types
break;
default:
dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
flags = cpucp_flags;
break;
}
......@@ -134,7 +134,7 @@ static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types ty
break;
default:
dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
flags = cpucp_flags;
break;
}
......@@ -162,7 +162,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
break;
if (type >= HWMON_NR_SENSOR_TYPES) {
dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type);
dev_err_ratelimited(hdev->dev,
"Got wrong sensor type %d from device\n", type);
return -EINVAL;
}
......@@ -584,7 +585,7 @@ int hl_get_temperature(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
......@@ -611,7 +612,7 @@ int hl_set_temperature(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set temperature of sensor %d, error %d\n",
sensor_index, rc);
......@@ -638,7 +639,7 @@ int hl_get_voltage(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
......@@ -667,7 +668,7 @@ int hl_get_current(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get current from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
......@@ -696,7 +697,7 @@ int hl_get_fan_speed(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
......@@ -725,7 +726,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
......@@ -752,7 +753,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set pwm info to sensor %d, error %d\n",
sensor_index, rc);
}
......@@ -775,7 +776,7 @@ int hl_set_voltage(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set voltage of sensor %d, error %d\n",
sensor_index, rc);
......@@ -800,7 +801,7 @@ int hl_set_current(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set current of sensor %d, error %d\n",
sensor_index, rc);
......@@ -831,7 +832,7 @@ int hl_set_power(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set power of sensor %d, error %d\n",
sensor_index, rc);
......@@ -858,7 +859,7 @@ int hl_get_power(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get power of sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
......
# SPDX-License-Identifier: GPL-2.0-only
HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \
common/mmu/mmu_v2_hr.o
common/mmu/mmu_v2.o common/mmu/mmu_v2_hr.o
......@@ -585,6 +585,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
int hl_mmu_if_set_funcs(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (hdev->mmu_disable)
return 0;
......@@ -597,8 +599,9 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
case ASIC_GAUDI2:
case ASIC_GAUDI2B:
case ASIC_GAUDI2C:
/* MMUs in Gaudi2 are always host resident */
hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
if (prop->pmmu.host_resident)
hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
......@@ -1209,3 +1212,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
return 0;
}
struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = NULL;
hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
(unsigned long) hop_addr)
if (hop_addr == pgt_info->shadow_addr)
break;
return pgt_info;
}
void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
}
void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info)
{
struct hl_device *hdev = ctx->hdev;
gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
hdev->asic_prop.dmmu.hop_table_size);
hash_del(&pgt_info->node);
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
kfree(pgt_info);
}
u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx)
{
return ctx->hdev->asic_prop.mmu_pgt_addr +
(ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
}
u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx)
{
return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
(ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
}
u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
{
u64 page_mask = ctx->hdev->asic_prop.dmmu.hop_table_size - 1;
u64 shadow_hop_addr = shadow_addr & (~page_mask);
u64 pte_offset = shadow_addr & page_mask;
u64 phys_hop_addr;
if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr(ctx))
phys_hop_addr = hl_mmu_dr_get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
else
phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
return phys_hop_addr + pte_offset;
}
void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
{
u64 phys_val = hl_mmu_dr_get_phys_addr(ctx, val);
ctx->hdev->asic_funcs->write_pte(ctx->hdev, hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr),
phys_val);
*(u64 *) (uintptr_t) shadow_pte_addr = val;
}
void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
{
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), val);
*(u64 *) (uintptr_t) shadow_pte_addr = val;
}
void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr)
{
hl_mmu_dr_write_final_pte(ctx, pte_addr, 0);
}
void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr)
{
hl_mmu_dr_get_pgt_info(ctx, hop_addr)->num_of_ptes++;
}
int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
int num_of_ptes_left;
pgt_info->num_of_ptes--;
/*
* Need to save the number of ptes left because hl_mmu_free_hop might free
* the pgt_info
*/
num_of_ptes_left = pgt_info->num_of_ptes;
if (!num_of_ptes_left)
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
return num_of_ptes_left;
}
u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pgt_info *pgt_info;
u64 phys_addr, shadow_addr;
pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
if (!pgt_info)
return ULLONG_MAX;
phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
prop->dmmu.hop_table_size);
if (!phys_addr) {
dev_err(hdev->dev, "failed to allocate page\n");
goto pool_add_err;
}
shadow_addr = (u64) (uintptr_t) kzalloc(prop->dmmu.hop_table_size,
GFP_KERNEL);
if (!shadow_addr)
goto shadow_err;
pgt_info->phys_addr = phys_addr;
pgt_info->shadow_addr = shadow_addr;
pgt_info->ctx = ctx;
pgt_info->num_of_ptes = 0;
hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
return shadow_addr;
shadow_err:
gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool,
phys_addr, prop->dmmu.hop_table_size);
pool_add_err:
kfree(pgt_info);
return ULLONG_MAX;
}
u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop)
{
u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr == ULLONG_MAX) {
hop_addr = hl_mmu_dr_alloc_hop(ctx);
*is_new_hop = (hop_addr != ULLONG_MAX);
}
return hop_addr;
}
void hl_mmu_dr_flush(struct hl_ctx *ctx)
{
/* flush all writes from all cores to reach PCI */
mb();
ctx->hdev->asic_funcs->read_pte(ctx->hdev, hl_mmu_dr_get_phys_hop0_addr(ctx));
}
int hl_mmu_dr_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
hdev->mmu_priv.dr.mmu_pgt_pool =
gen_pool_create(__ffs(prop->dmmu.hop_table_size), -1);
if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
dev_err(hdev->dev, "Failed to create page gen pool\n");
return -ENOMEM;
}
rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
prop->dmmu.hop0_tables_total_size,
prop->dmmu.pgt_size - prop->dmmu.hop0_tables_total_size,
-1);
if (rc) {
dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
goto err_pool_add;
}
hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid,
prop->dmmu.hop_table_size, GFP_KERNEL);
if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
rc = -ENOMEM;
goto err_pool_add;
}
/* MMU H/W init will be done in device hw_init() */
return 0;
err_pool_add:
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
return rc;
}
void hl_mmu_dr_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0))
return;
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
/* Make sure that if we arrive here again without init was
* called we won't cause kernel panic. This can happen for
* example if we fail during hard reset code at certain points
*/
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2020 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "../habanalabs.h"
#include "../../include/hw_ip/mmu/mmu_general.h"
#include "../../include/hw_ip/mmu/mmu_v2_0.h"
#include <linux/slab.h>
/**
* hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
* @ctx: pointer to the context structure to initialize.
*
* Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
* page tables hops related to this context.
* Return: 0 on success, non-zero otherwise.
*/
static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
{
hash_init(ctx->mmu_shadow_hash);
return 0;
}
/*
* hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
*
* @ctx: pointer to the context structure
*
* This function does the following:
* - Free any pgts which were not freed yet
* - Free the mutex
* - Free DRAM default page mapping hops
*/
static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct pgt_info *pgt_info;
struct hlist_node *tmp;
int i;
if (!hash_empty(ctx->mmu_shadow_hash))
dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
ctx->asid);
hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
dev_err_ratelimited(hdev->dev,
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
}
}
static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
{
u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
scrambled_virt_addr;
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
struct hl_device *hdev = ctx->hdev;
struct hl_mmu_properties *mmu_prop;
bool is_huge = false;
int i, hop_last;
/* device resident in V2 are allowed only for HMMU */
if (!is_dram_addr)
return -EINVAL;
mmu_prop = &prop->dmmu;
hop_last = mmu_prop->num_hops - 1;
scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hop_addr[0], scrambled_virt_addr);
if (hop_pte_addr[0] == U64_MAX)
return -EFAULT;
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
for (i = 1 ; i < mmu_prop->num_hops ; i++) {
hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr[i] == ULLONG_MAX)
goto not_mapped;
hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hop_addr[i], scrambled_virt_addr);
if (hop_pte_addr[i] == U64_MAX)
return -EFAULT;
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
hop_last = i;
is_huge = true;
break;
}
}
if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
return -EFAULT;
}
if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped;
for (i = hop_last ; i > 0 ; i--) {
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
goto mapped;
}
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
mapped:
return 0;
not_mapped:
dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
virt_addr);
return -EINVAL;
}
static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool is_dram_addr)
{
u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
bool hop_new[MMU_ARCH_6_HOPS] = { false };
struct hl_device *hdev = ctx->hdev;
struct hl_mmu_properties *mmu_prop;
int rc, i, hop_last;
/* device resident in V2 are allowed only for HMMU */
if (!is_dram_addr)
return -EINVAL;
mmu_prop = &prop->dmmu;
hop_last = mmu_prop->num_hops - 1;
scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
/* First hop is preallocated therefore it is treated differently */
hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hop_addr[0], scrambled_virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
/* Handle hop1 to hop_last */
for (i = 1 ; i <= hop_last ; i++) {
hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
if (hop_addr[i] == ULLONG_MAX) {
rc = -ENOMEM;
goto err;
}
hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hop_addr[i], scrambled_virt_addr);
if (hop_pte_addr[i] == U64_MAX) {
rc = -EINVAL;
goto err;
}
if (!hop_pte_addr[i]) {
rc = -EINVAL;
goto err;
}
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
}
if (curr_pte & PAGE_PRESENT_MASK) {
dev_err(hdev->dev,
"mapping already exists for virt_addr 0x%llx\n",
virt_addr);
for (i = 0 ; i <= hop_last ; i++)
dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
i, *(u64 *) (uintptr_t) hop_pte_addr[i],
hop_pte_addr[i]);
rc = -EINVAL;
goto err;
}
curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
| mmu_prop->last_mask | PAGE_PRESENT_MASK;
/* Write the PTEs */
hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
/* for each new hop, add its address to the table of previous-hop */
for (i = 1 ; i <= hop_last ; i++) {
if (hop_new[i]) {
curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
if (i - 1)
hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
}
}
hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
return 0;
err:
for (i = 1 ; i <= hop_last ; i++)
if (hop_new[i] && (hop_addr[i] != U64_MAX))
hl_mmu_dr_free_hop(ctx, hop_addr[i]);
return rc;
}
/*
* hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
*
* @ctx: pointer to the context structure
*
*/
static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
{
}
/*
* hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
*
* @ctx: pointer to the context structure
*
*/
static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
{
}
static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
{
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
struct hl_device *hdev = ctx->hdev;
struct hl_mmu_properties *mmu_prop;
bool is_dram_addr;
int i;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
/* device resident in V2 are allowed only for HMMU */
if (!is_dram_addr)
return -EINVAL;
mmu_prop = &prop->dmmu;
hops->range_type = HL_VA_RANGE_TYPE_DRAM;
hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hops->hop_info[0].hop_addr,
hops->scrambled_vaddr);
if (hops->hop_info[0].hop_pte_addr == U64_MAX)
return -EFAULT;
hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
hops->hop_info[0].hop_pte_addr);
if (hops->hop_info[0].hop_pte_val == U64_MAX)
return -EFAULT;
for (i = 1 ; i < mmu_prop->num_hops ; i++) {
hops->hop_info[i].hop_addr =
hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
if (hops->hop_info[i].hop_addr == ULLONG_MAX)
return -EFAULT;
hops->hop_info[i].hop_pte_addr =
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hops->hop_info[i].hop_addr,
hops->scrambled_vaddr);
if (hops->hop_info[i].hop_pte_addr == U64_MAX)
return -EFAULT;
hops->hop_info[i].hop_pte_val =
hdev->asic_funcs->read_pte(hdev,
hops->hop_info[i].hop_pte_addr);
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
return -EFAULT;
if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
break;
}
/* if passed over all hops then no last hop was found */
if (i == mmu_prop->num_hops)
return -EFAULT;
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
return -EFAULT;
if (hops->scrambled_vaddr != virt_addr)
hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
(hdev, hops->hop_info[i].hop_pte_val);
else
hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
hops->used_hops = i + 1;
return 0;
}
/*
* hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
*
* @hdev: pointer to the device structure
* @mmu_if: pointer to the mmu interface structure
*/
void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
{
mmu->init = hl_mmu_dr_init;
mmu->fini = hl_mmu_dr_fini;
mmu->ctx_init = hl_mmu_v2_ctx_init;
mmu->ctx_fini = hl_mmu_v2_ctx_fini;
mmu->map = hl_mmu_v2_map;
mmu->unmap = hl_mmu_v2_unmap;
mmu->flush = hl_mmu_dr_flush;
mmu->swap_out = hl_mmu_v2_swap_out;
mmu->swap_in = hl_mmu_v2_swap_in;
mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
}
......@@ -47,7 +47,7 @@ static inline int hl_mmu_v2_hr_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size,
return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size,
prop->mmu_pgt_size);
}
......@@ -65,7 +65,7 @@ static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size);
hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size);
}
/**
......@@ -108,7 +108,7 @@ static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx)
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr,
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
}
}
......@@ -150,7 +150,7 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) {
hop_last = i;
......@@ -169,14 +169,14 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
for (i = hop_last ; i > 0 ; i--) {
hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
ctx->hdev->asic_prop.mmu_hop_table_size))
ctx->hdev->asic_prop.pmmu.hop_table_size))
goto mapped;
}
hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
mapped:
return 0;
......@@ -255,7 +255,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
scrambled_virt_addr);
curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
}
if (curr_pte & PAGE_PRESENT_MASK) {
......@@ -268,7 +268,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
*(u64 *) (uintptr_t)
hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size),
ctx->hdev->asic_prop.pmmu.hop_table_size),
hop_pte_phys_addr[i]);
rc = -EINVAL;
goto err;
......@@ -279,7 +279,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
/* Write the PTEs */
hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte,
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
/* for each new hop, add its address to the table of previous-hop */
for (i = 1 ; i <= hop_last ; i++) {
......@@ -287,7 +287,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1],
curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size);
curr_pte, ctx->hdev->asic_prop.pmmu.hop_table_size);
if (i - 1)
hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
hops_pgt_info[i - 1]->phys_addr);
......@@ -303,7 +303,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
for (i = 1 ; i <= hop_last ; i++)
if (hop_new[i] && hops_pgt_info[i])
hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
return rc;
}
......
......@@ -7,15 +7,31 @@
#include "habanalabs.h"
static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = {
static const char * const hl_glbl_error_cause[] = {
"Error due to un-priv read",
"Error due to un-secure read",
"Error due to read from unmapped reg",
"Error due to un-priv write",
"Error due to un-secure write",
"Error due to write to unmapped reg",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"External I/F write sec violation",
"External I/F write to un-mapped reg",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"Read to write only",
"Write to read only"
};
......@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
static int hl_read_glbl_errors(struct hl_device *hdev,
u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
{
struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_special_block_info *special_blocks = prop->special_blocks;
struct hl_special_block_info *current_block = &special_blocks[blk_idx];
u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address);
base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
int i;
block_base = base + major * current_block->major_offset +
......@@ -689,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
addr_val = RREG32(glbl_err_addr);
for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) {
for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
if (cause_val & BIT(i))
dev_err_ratelimited(hdev->dev,
"%s, addr %#llx\n",
hl_glbl_error_cause[i],
hdev->asic_prop.cfg_base_address + block_base +
FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
"%s, addr %#llx\n",
hl_glbl_error_cause[i],
prop->cfg_base_address + block_base +
FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
}
WREG32(glbl_err_cause, cause_val);
......
......@@ -13,8 +13,7 @@
struct hl_device;
/* special blocks */
#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10
#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
/* GLBL_ERR_ADDR register offset from the start of the block */
#define HL_GLBL_ERR_ADDR_OFFSET 0xF44
/* GLBL_ERR_CAUSE register offset from the start of the block */
......
......@@ -614,8 +614,6 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = false;
......@@ -637,8 +635,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
......@@ -649,6 +647,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
prop->dmmu.end_addr = VA_HOST_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->dmmu.pgt_size = prop->mmu_pgt_size;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
......@@ -3652,7 +3651,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
(i * prop->mmu_hop_table_size);
(i * prop->dmmu.hop_table_size);
rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {
......
This diff is collapsed.
......@@ -19,8 +19,6 @@
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */
#define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define NUMBER_OF_PDMA_QUEUES 2
......@@ -109,13 +107,11 @@
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
/* This define should be used only when working in a debug mode without dram.
* When working with dram, the driver size will be calculated dynamically.
*/
#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define PMMU_PAGE_TABLES_SIZE 0x10000000 /* 256MB */
#define EDMA_PQS_SIZE SZ_2M
#define EDMA_SCRATCHPAD_SIZE SZ_1M
#define HMMU_PAGE_TABLES_SIZE SZ_1M
#define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES
......@@ -241,9 +237,8 @@
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
......
......@@ -413,8 +413,6 @@ int goya_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = true;
......@@ -435,8 +433,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
prop->dmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->dmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
......@@ -446,8 +444,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
......@@ -2678,7 +2676,7 @@ int goya_mmu_init(struct hl_device *hdev)
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
(i * prop->mmu_hop_table_size);
(i * prop->dmmu.hop_table_size);
rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {
......
......@@ -576,7 +576,6 @@ static int goya_config_spmu(struct hl_device *hdev,
struct hl_debug_params *params)
{
u64 base_reg;
struct hl_debug_params_spmu *input = params->input;
u64 *output;
u32 output_arr_len;
u32 events_num;
......@@ -592,7 +591,7 @@ static int goya_config_spmu(struct hl_device *hdev,
base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
if (params->enable) {
input = params->input;
struct hl_debug_params_spmu *input = params->input;
if (!input)
return -EINVAL;
......
......@@ -26,6 +26,8 @@
#define LAST_MASK 0x0000000000800ull
#define FLAGS_MASK 0x0000000000FFFull
#define MMU_ARCH_3_HOPS 3
#define MMU_ARCH_4_HOPS 4
#define MMU_ARCH_5_HOPS 5
#define MMU_ARCH_6_HOPS 6
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment