Commit e4cdccd2 authored by farah kassabri's avatar farah kassabri Committed by Oded Gabbay

habanalabs: add support for encapsulated signals submission

This commit is the second part of the encapsulated signals feature.
It contains the driver support for submission of cs with encapsulated
signals and the wait for them.
Signed-off-by: default avatarfarah kassabri <fkassabri@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent dadf17ab
......@@ -16,7 +16,10 @@ void hl_encaps_handle_do_release(struct kref *ref)
struct hl_ctx *ctx = handle->hdev->compute_ctx;
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
spin_lock(&mgr->lock);
idr_remove(&mgr->handles, handle->id);
spin_unlock(&mgr->lock);
kfree(handle);
}
......@@ -33,7 +36,10 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref)
*/
hw_sob_put(handle->hw_sob);
spin_lock(&mgr->lock);
idr_remove(&mgr->handles, handle->id);
spin_unlock(&mgr->lock);
kfree(handle);
}
......@@ -67,11 +73,6 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
struct hl_device *hdev = ctx->hdev;
int i;
/* Release all allocated pending cb's, those cb's were never
* scheduled so it is safe to release them here
*/
hl_pending_cb_list_flush(ctx);
/* Release all allocated HW block mapped list entries and destroy
* the mutex.
*/
......@@ -198,11 +199,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
kref_init(&ctx->refcount);
ctx->cs_sequence = 1;
INIT_LIST_HEAD(&ctx->pending_cb_list);
spin_lock_init(&ctx->pending_cb_lock);
spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_ctx_switch_token, 1);
atomic_set(&ctx->thread_pending_cb_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
sizeof(struct hl_fence *),
......
......@@ -605,11 +605,11 @@ struct hl_fence {
/**
* struct hl_cs_compl - command submission completion object.
* @sob_reset_work: workqueue object to run SOB reset flow.
* @base_fence: hl fence object.
* @lock: spinlock to protect fence.
* @hdev: habanalabs device structure.
* @hw_sob: the H/W SOB used in this signal/wait CS.
* @encaps_sig_hdl: encaps signals hanlder.
* @cs_seq: command submission sequence number.
* @type: type of the CS - signal/wait.
* @sob_val: the SOB value that is used in this signal/wait CS.
......@@ -618,11 +618,11 @@ struct hl_fence {
* encaps signals or not.
*/
struct hl_cs_compl {
struct work_struct sob_reset_work;
struct hl_fence base_fence;
spinlock_t lock;
struct hl_device *hdev;
struct hl_hw_sob *hw_sob;
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
u64 cs_seq;
enum hl_cs_type type;
u16 sob_val;
......@@ -1267,8 +1267,9 @@ struct hl_asic_funcs {
u64 (*get_device_time)(struct hl_device *hdev);
int (*collective_wait_init_cs)(struct hl_cs *cs);
int (*collective_wait_create_jobs)(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id);
struct hl_ctx *ctx, struct hl_cs *cs,
u32 wait_queue_id, u32 collective_engine_id,
u32 encaps_signal_offset);
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
......@@ -1339,20 +1340,6 @@ struct hl_cs_counters_atomic {
atomic64_t validation_drop_cnt;
};
/**
* struct hl_pending_cb - pending command buffer structure
* @cb_node: cb node in pending cb list
* @cb: command buffer to send in next submission
* @cb_size: command buffer size
* @hw_queue_id: destination queue id
*/
struct hl_pending_cb {
struct list_head cb_node;
struct hl_cb *cb;
u32 cb_size;
u32 hw_queue_id;
};
/**
* struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area
......@@ -1369,8 +1356,6 @@ struct hl_pending_cb {
* MMU hash or walking the PGT requires talking this lock.
* @hw_block_list_lock: protects the HW block memory list.
* @debugfs_list: node in debugfs list of contexts.
* pending_cb_list: list of pending command buffers waiting to be sent upon
* next user command submission context.
* @hw_block_mem_list: list of HW block virtual mapped addresses.
* @cs_counters: context command submission counters.
* @cb_va_pool: device VA pool for command buffers which are mapped to the
......@@ -1381,17 +1366,11 @@ struct hl_pending_cb {
* index to cs_pending array.
* @dram_default_hops: array that holds all hops addresses needed for default
* DRAM mapping.
* @pending_cb_lock: spinlock to protect pending cb list
* @cs_lock: spinlock to protect cs_sequence.
* @dram_phys_mem: amount of used physical DRAM memory by this context.
* @thread_ctx_switch_token: token to prevent multiple threads of the same
* context from running the context switch phase.
* Only a single thread should run it.
* @thread_pending_cb_token: token to prevent multiple threads from processing
* the pending CB list. Only a single thread should
* process the list since it is protected by a
* spinlock and we don't want to halt the entire
* command submission sequence.
* @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
* the context switch phase from moving to their
* execution phase before the context switch phase
......@@ -1411,18 +1390,15 @@ struct hl_ctx {
struct mutex mmu_lock;
struct mutex hw_block_list_lock;
struct list_head debugfs_list;
struct list_head pending_cb_list;
struct list_head hw_block_mem_list;
struct hl_cs_counters_atomic cs_counters;
struct gen_pool *cb_va_pool;
struct hl_encaps_signals_mgr sig_mgr;
u64 cs_sequence;
u64 *dram_default_hops;
spinlock_t pending_cb_lock;
spinlock_t cs_lock;
atomic64_t dram_phys_mem;
atomic_t thread_ctx_switch_token;
atomic_t thread_pending_cb_token;
u32 thread_ctx_switch_wait_token;
u32 asid;
u32 handle;
......@@ -1485,12 +1461,14 @@ struct hl_userptr {
* @mirror_node : node in device mirror list of command submissions.
* @staged_cs_node: node in the staged cs list.
* @debugfs_list: node in debugfs list of command submissions.
* @encaps_sig_hdl: holds the encaps signals handle.
* @sequence: the sequence number of this CS.
* @staged_sequence: the sequence of the staged submission this CS is part of,
* relevant only if staged_cs is set.
* @timeout_jiffies: cs timeout in jiffies.
* @submission_time_jiffies: submission time of the cs
* @type: CS_TYPE_*.
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
* @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device.
* @timedout : true if CS was timedout.
......@@ -1504,6 +1482,7 @@ struct hl_userptr {
* @staged_cs: true if this CS is part of a staged submission.
* @skip_reset_on_timeout: true if we shall not reset the device in case
* timeout occurs (debug scenario).
* @encaps_signals: true if this CS has encaps reserved signals.
*/
struct hl_cs {
u16 *jobs_in_queue_cnt;
......@@ -1518,11 +1497,13 @@ struct hl_cs {
struct list_head mirror_node;
struct list_head staged_cs_node;
struct list_head debugfs_list;
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
u64 sequence;
u64 staged_sequence;
u64 timeout_jiffies;
u64 submission_time_jiffies;
enum hl_cs_type type;
u32 encaps_sig_hdl_id;
u8 submitted;
u8 completed;
u8 timedout;
......@@ -1533,6 +1514,7 @@ struct hl_cs {
u8 staged_first;
u8 staged_cs;
u8 skip_reset_on_timeout;
u8 encaps_signals;
};
/**
......@@ -1552,6 +1534,8 @@ struct hl_cs {
* @hw_queue_id: the id of the H/W queue this job is submitted to.
* @user_cb_size: the actual size of the CB we got from the user.
* @job_cb_size: the actual size of the CB that we put on the queue.
* @encaps_sig_wait_offset: encapsulated signals offset, which allow user
* to wait on part of the reserved signals.
* @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
* handle to a kernel-allocated CB object, false
* otherwise (SRAM/DRAM/host address).
......@@ -1576,6 +1560,7 @@ struct hl_cs_job {
u32 hw_queue_id;
u32 user_cb_size;
u32 job_cb_size;
u32 encaps_sig_wait_offset;
u8 is_kernel_allocated_cb;
u8 contains_dma_pkt;
};
......@@ -2794,7 +2779,6 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
void hl_cs_rollback_all(struct hl_device *hdev);
void hl_pending_cb_list_flush(struct hl_ctx *ctx);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
......@@ -2935,9 +2919,12 @@ int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
void hl_encaps_handle_do_release(struct kref *ref);
void hw_sob_get(struct hl_hw_sob *hw_sob);
void hw_sob_put(struct hl_hw_sob *hw_sob);
void hl_encaps_handle_do_release(struct kref *ref);
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
struct hl_cs *cs, struct hl_cs_job *job,
struct hl_cs_compl *cs_cmpl);
void hl_release_pending_user_interrupts(struct hl_device *hdev);
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
......
......@@ -416,7 +416,7 @@ static int init_signal_cs(struct hl_device *hdev,
cs_cmpl->sob_val = prop->next_sob_val;
dev_dbg(hdev->dev,
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d, seq: %llu\n",
"generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
cs_cmpl->cs_seq);
......@@ -432,12 +432,31 @@ static int init_signal_cs(struct hl_device *hdev,
return rc;
}
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
struct hl_cs *cs, struct hl_cs_job *job,
struct hl_cs_compl *cs_cmpl)
{
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
cs_cmpl->hw_sob = handle->hw_sob;
/* Note that encaps_sig_wait_offset was validated earlier in the flow
* for offset value which exceeds the max reserved signal count.
* always decrement 1 of the offset since when the user
* set offset 1 for example he mean to wait only for the first
* signal only, which will be pre_sob_val, and if he set offset 2
* then the value required is (pre_sob_val + 1) and so on...
*/
cs_cmpl->sob_val = handle->pre_sob_val +
(job->encaps_sig_wait_offset - 1);
}
static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
{
struct hl_cs_compl *signal_cs_cmpl;
struct hl_sync_stream_properties *prop;
struct hl_gen_wait_properties wait_prop;
struct hl_sync_stream_properties *prop;
struct hl_cs_compl *signal_cs_cmpl;
u32 q_idx;
q_idx = job->hw_queue_id;
......@@ -447,9 +466,23 @@ static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
struct hl_cs_compl,
base_fence);
/* copy the SOB id and value of the signal CS */
if (cs->encaps_signals) {
/* use the encaps signal handle stored earlier in the flow
* and set the SOB information from the encaps
* signals handle
*/
hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
cs->encaps_sig_hdl->q_idx,
cs->encaps_sig_hdl->cs_seq,
cs_cmpl->sob_val,
job->encaps_sig_wait_offset);
} else {
/* Copy the SOB id and value of the signal CS */
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
}
/* check again if the signal cs already completed.
* if yes then don't send any wait cs since the hw_sob
......@@ -523,6 +556,59 @@ static int init_signal_wait_cs(struct hl_cs *cs)
return rc;
}
static int encaps_sig_first_staged_cs_handler
(struct hl_device *hdev, struct hl_cs *cs)
{
struct hl_cs_compl *cs_cmpl =
container_of(cs->fence,
struct hl_cs_compl, base_fence);
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
struct hl_encaps_signals_mgr *mgr;
int rc = 0;
mgr = &hdev->compute_ctx->sig_mgr;
spin_lock(&mgr->lock);
encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
if (encaps_sig_hdl) {
/*
* Set handler CS sequence,
* the CS which contains the encapsulated signals.
*/
encaps_sig_hdl->cs_seq = cs->sequence;
/* store the handle and set encaps signal indication,
* to be used later in cs_do_release to put the last
* reference to encaps signals handlers.
*/
cs_cmpl->encaps_signals = true;
cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
/* set hw_sob pointer in completion object
* since it's used in cs_do_release flow to put
* refcount to sob
*/
cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
encaps_sig_hdl->count;
dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
cs->sequence, encaps_sig_hdl->id,
encaps_sig_hdl->count,
encaps_sig_hdl->q_idx,
cs_cmpl->hw_sob->sob_id,
cs_cmpl->sob_val);
} else {
dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
cs->encaps_sig_hdl_id);
rc = -EINVAL;
}
spin_unlock(&mgr->lock);
return rc;
}
/*
* hl_hw_queue_schedule_cs - schedule a command submission
* @cs: pointer to the CS
......@@ -602,6 +688,12 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
}
if (cs->encaps_signals && cs->staged_first) {
rc = encaps_sig_first_staged_cs_handler(hdev, cs);
if (rc)
goto unroll_cq_resv;
}
spin_lock(&hdev->cs_mirror_lock);
/* Verify staged CS exists and add to the staged list */
......
......@@ -456,8 +456,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
u32 size, u64 val);
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
u32 num_regs, u32 val);
static int gaudi_schedule_register_memset(struct hl_device *hdev,
u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
u32 tpc_id);
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
......@@ -468,7 +466,6 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size, bool eb);
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
struct hl_gen_wait_properties *prop);
static inline enum hl_collective_mode
get_collective_mode(struct hl_device *hdev, u32 queue_id)
{
......@@ -1068,17 +1065,11 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
struct gaudi_hw_sob_group *hw_sob_group =
container_of(ref, struct gaudi_hw_sob_group, kref);
struct hl_device *hdev = hw_sob_group->hdev;
u64 base_addr;
int rc;
int i;
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
hw_sob_group->base_sob_id * 4;
rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
if (rc)
dev_err(hdev->dev,
"failed resetting sob group - sob base %u, count %u",
hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
kref_init(&hw_sob_group->kref);
}
......@@ -1215,6 +1206,20 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
queue_id = job->hw_queue_id;
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
if (job->cs->encaps_signals) {
/* use the encaps signal handle store earlier in the flow
* and set the SOB information from the encaps
* signals handle
*/
hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
cs_cmpl);
dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
job->cs->sequence,
cs_cmpl->hw_sob->sob_id,
cs_cmpl->sob_val);
}
/* Add to wait CBs using slave monitor */
wait_prop.data = (void *) job->user_cb;
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
......@@ -1225,7 +1230,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
wait_prop.size = cb_size;
dev_dbg(hdev->dev,
"Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
prop->collective_slave_mon_id, queue_id);
......@@ -1257,9 +1262,14 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
gaudi = hdev->asic_specific;
cprop = &gaudi->collective_props;
/* In encaps signals case the SOB info will be retrieved from
* the handle in gaudi_collective_slave_init_job.
*/
if (!cs->encaps_signals) {
/* copy the SOB id and value of the signal CS */
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
}
/* check again if the signal cs already completed.
* if yes then don't send any wait cs since the hw_sob
......@@ -1336,7 +1346,8 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs,
enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
u32 encaps_signal_offset)
{
struct hw_queue_properties *hw_queue_prop;
struct hl_cs_counters_atomic *cntr;
......@@ -1396,6 +1407,13 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
job->user_cb_size = cb_size;
job->hw_queue_id = queue_id;
/* since its guaranteed to have only one chunk in the collective wait
* cs, we can use this chunk to set the encapsulated signal offset
* in the jobs.
*/
if (cs->encaps_signals)
job->encaps_sig_wait_offset = encaps_signal_offset;
/*
* No need in parsing, user CB is the patched CB.
* We call hl_cb_destroy() out of two reasons - we don't need
......@@ -1424,8 +1442,9 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
}
static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id)
struct hl_ctx *ctx, struct hl_cs *cs,
u32 wait_queue_id, u32 collective_engine_id,
u32 encaps_signal_offset)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct hw_queue_properties *hw_queue_prop;
......@@ -1475,7 +1494,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
if (i == 0) {
queue_id = wait_queue_id;
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
HL_COLLECTIVE_MASTER, queue_id,
wait_queue_id, encaps_signal_offset);
} else {
if (nic_idx < NIC_NUMBER_OF_ENGINES) {
if (gaudi->hw_cap_initialized &
......@@ -1495,7 +1515,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
}
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
HL_COLLECTIVE_SLAVE, queue_id,
wait_queue_id, encaps_signal_offset);
}
if (rc)
......@@ -5909,78 +5930,6 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
return rc;
}
static int gaudi_schedule_register_memset(struct hl_device *hdev,
u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
{
struct hl_ctx *ctx;
struct hl_pending_cb *pending_cb;
struct packet_msg_long *pkt;
u32 cb_size, ctl;
struct hl_cb *cb;
int i, rc;
mutex_lock(&hdev->fpriv_list_lock);
ctx = hdev->compute_ctx;
/* If no compute context available or context is going down
* memset registers directly
*/
if (!ctx || kref_read(&ctx->refcount) == 0) {
rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
mutex_unlock(&hdev->fpriv_list_lock);
return rc;
}
mutex_unlock(&hdev->fpriv_list_lock);
cb_size = (sizeof(*pkt) * num_regs) +
sizeof(struct packet_msg_prot) * 2;
if (cb_size > SZ_2M) {
dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
return -ENOMEM;
}
pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
if (!pending_cb)
return -ENOMEM;
cb = hl_cb_kernel_create(hdev, cb_size, false);
if (!cb) {
kfree(pending_cb);
return -EFAULT;
}
pkt = cb->kernel_address;
ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
for (i = 0; i < num_regs ; i++, pkt++) {
pkt->ctl = cpu_to_le32(ctl);
pkt->value = cpu_to_le32(val);
pkt->addr = cpu_to_le64(reg_base + (i * 4));
}
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
pending_cb->cb = cb;
pending_cb->cb_size = cb_size;
/* The queue ID MUST be an external queue ID. Otherwise, we will
* have undefined behavior
*/
pending_cb->hw_queue_id = hw_queue_id;
spin_lock(&ctx->pending_cb_lock);
list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
spin_unlock(&ctx->pending_cb_lock);
return 0;
}
static int gaudi_restore_sm_registers(struct hl_device *hdev)
{
u64 base_addr;
......@@ -9031,16 +8980,12 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
static void gaudi_reset_sob(struct hl_device *hdev, void *data)
{
struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
int rc;
dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
hw_sob->sob_id);
rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
hw_sob->sob_id * 4, 1, 0);
if (rc)
dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
hw_sob->sob_id * 4, 0);
kref_init(&hw_sob->kref);
}
......
......@@ -5487,7 +5487,7 @@ static int goya_collective_wait_init_cs(struct hl_cs *cs)
static int goya_collective_wait_create_jobs(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id)
u32 collective_engine_id, u32 encaps_signal_offset)
{
return -EINVAL;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment