Commit e902585f authored by Mikko Perttunen's avatar Mikko Perttunen Committed by Thierry Reding

gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer

Add support for inserting syncpoint waits in the CDMA pushbuffer.
These waits need to be done in HOST1X class, while gather submitted
by the application execute in engine class.

Support is added by converting the gather list of job into a command
list that can include both gathers and waits. When the job is
submitted, these commands are pushed as the appropriate opcodes
on the CDMA pushbuffer.

Also supported are waits relative to the start of the job,
which are useful for jobs doing multiple things with an engine
that doesn't natively support pipelining.

While at it, use 32-bit waits on chips that support them.
Signed-off-by: default avatarMikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: default avatarThierry Reding <treding@nvidia.com>
parent 17a298e9
...@@ -47,39 +47,84 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, ...@@ -47,39 +47,84 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
} }
} }
static void submit_gathers(struct host1x_job *job) static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
u32 next_class)
{
#if HOST1X_HW >= 2
host1x_cdma_push_wide(cdma,
host1x_opcode_setclass(
HOST1X_CLASS_HOST1X,
HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
/* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
BIT(0) | BIT(2)
),
threshold,
id,
host1x_opcode_setclass(next_class, 0, 0)
);
#else
/* TODO add waitchk or use waitbases or other mitigation */
host1x_cdma_push(cdma,
host1x_opcode_setclass(
HOST1X_CLASS_HOST1X,
host1x_uclass_wait_syncpt_r(),
BIT(0)
),
host1x_class_host_wait_syncpt(id, threshold)
);
host1x_cdma_push(cdma,
host1x_opcode_setclass(next_class, 0, 0),
HOST1X_OPCODE_NOP
);
#endif
}
static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
{ {
struct host1x_cdma *cdma = &job->channel->cdma; struct host1x_cdma *cdma = &job->channel->cdma;
#if HOST1X_HW < 6 #if HOST1X_HW < 6
struct device *dev = job->channel->dev; struct device *dev = job->channel->dev;
#endif #endif
unsigned int i; unsigned int i;
u32 threshold;
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g = &job->gathers[i]; struct host1x_job_cmd *cmd = &job->cmds[i];
dma_addr_t addr = g->base + g->offset;
u32 op2, op3;
op2 = lower_32_bits(addr); if (cmd->is_wait) {
op3 = upper_32_bits(addr); if (cmd->wait.relative)
threshold = job_syncpt_base + cmd->wait.threshold;
else
threshold = cmd->wait.threshold;
trace_write_gather(cdma, g->bo, g->offset, g->words); submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
} else {
struct host1x_job_gather *g = &cmd->gather;
dma_addr_t addr = g->base + g->offset;
u32 op2, op3;
op2 = lower_32_bits(addr);
op3 = upper_32_bits(addr);
if (op3 != 0) { trace_write_gather(cdma, g->bo, g->offset, g->words);
if (op3 != 0) {
#if HOST1X_HW >= 6 #if HOST1X_HW >= 6
u32 op1 = host1x_opcode_gather_wide(g->words); u32 op1 = host1x_opcode_gather_wide(g->words);
u32 op4 = HOST1X_OPCODE_NOP; u32 op4 = HOST1X_OPCODE_NOP;
host1x_cdma_push_wide(cdma, op1, op2, op3, op4); host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
#else #else
dev_err(dev, "invalid gather for push buffer %pad\n", dev_err(dev, "invalid gather for push buffer %pad\n",
&addr); &addr);
continue; continue;
#endif #endif
} else { } else {
u32 op1 = host1x_opcode_gather(g->words); u32 op1 = host1x_opcode_gather(g->words);
host1x_cdma_push(cdma, op1, op2); host1x_cdma_push(cdma, op1, op2);
}
} }
} }
} }
...@@ -126,7 +171,7 @@ static int channel_submit(struct host1x_job *job) ...@@ -126,7 +171,7 @@ static int channel_submit(struct host1x_job *job)
struct host1x *host = dev_get_drvdata(ch->dev->parent); struct host1x *host = dev_get_drvdata(ch->dev->parent);
trace_host1x_channel_submit(dev_name(ch->dev), trace_host1x_channel_submit(dev_name(ch->dev),
job->num_gathers, job->num_relocs, job->num_cmds, job->num_relocs,
job->syncpt->id, job->syncpt_incrs); job->syncpt->id, job->syncpt_incrs);
/* before error checks, return current max */ /* before error checks, return current max */
...@@ -181,7 +226,7 @@ static int channel_submit(struct host1x_job *job) ...@@ -181,7 +226,7 @@ static int channel_submit(struct host1x_job *job)
host1x_opcode_setclass(job->class, 0, 0), host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP); HOST1X_OPCODE_NOP);
submit_gathers(job); submit_gathers(job, syncval - user_syncpt_incrs);
/* end CDMA submit & stash pinned hMems into sync queue */ /* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(&ch->cdma, job); host1x_cdma_end(&ch->cdma, job);
......
...@@ -208,10 +208,15 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) ...@@ -208,10 +208,15 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
job->first_get, job->timeout, job->first_get, job->timeout,
job->num_slots, job->num_unpins); job->num_slots, job->num_unpins);
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g = &job->gathers[i]; struct host1x_job_gather *g;
u32 *mapped; u32 *mapped;
if (job->cmds[i].is_wait)
continue;
g = &job->cmds[i].gather;
if (job->gather_copy_mapped) if (job->gather_copy_mapped)
mapped = (u32 *)job->gather_copy_mapped; mapped = (u32 *)job->gather_copy_mapped;
else else
......
...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) ...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
} }
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v) host1x_uclass_indoff_indroffset_f(v)
static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
{
return 0x4e;
}
#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
host1x_uclass_load_syncpt_payload_32_r()
static inline u32 host1x_uclass_wait_syncpt_32_r(void)
{
return 0x50;
}
#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
host1x_uclass_wait_syncpt_32_r()
#endif #endif
...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) ...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
} }
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v) host1x_uclass_indoff_indroffset_f(v)
static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
{
return 0x4e;
}
#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
host1x_uclass_load_syncpt_payload_32_r()
static inline u32 host1x_uclass_wait_syncpt_32_r(void)
{
return 0x50;
}
#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
host1x_uclass_wait_syncpt_32_r()
#endif #endif
...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) ...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
} }
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v) host1x_uclass_indoff_indroffset_f(v)
static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
{
return 0x4e;
}
#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
host1x_uclass_load_syncpt_payload_32_r()
static inline u32 host1x_uclass_wait_syncpt_32_r(void)
{
return 0x50;
}
#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
host1x_uclass_wait_syncpt_32_r()
#endif #endif
...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) ...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
} }
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v) host1x_uclass_indoff_indroffset_f(v)
static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
{
return 0x4e;
}
#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
host1x_uclass_load_syncpt_payload_32_r()
static inline u32 host1x_uclass_wait_syncpt_32_r(void)
{
return 0x50;
}
#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
host1x_uclass_wait_syncpt_32_r()
#endif #endif
...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) ...@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
} }
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v) host1x_uclass_indoff_indroffset_f(v)
static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
{
return 0x4e;
}
#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
host1x_uclass_load_syncpt_payload_32_r()
static inline u32 host1x_uclass_wait_syncpt_32_r(void)
{
return 0x50;
}
#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
host1x_uclass_wait_syncpt_32_r()
#endif #endif
...@@ -38,7 +38,7 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, ...@@ -38,7 +38,7 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
total = sizeof(struct host1x_job) + total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_relocs * sizeof(struct host1x_reloc) +
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
(u64)num_cmdbufs * sizeof(struct host1x_job_gather) + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
(u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(dma_addr_t) +
(u64)num_unpins * sizeof(u32 *); (u64)num_unpins * sizeof(u32 *);
if (total > ULONG_MAX) if (total > ULONG_MAX)
...@@ -57,8 +57,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, ...@@ -57,8 +57,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
mem += num_relocs * sizeof(struct host1x_reloc); mem += num_relocs * sizeof(struct host1x_reloc);
job->unpins = num_unpins ? mem : NULL; job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * sizeof(struct host1x_job_unpin_data); mem += num_unpins * sizeof(struct host1x_job_unpin_data);
job->gathers = num_cmdbufs ? mem : NULL; job->cmds = num_cmdbufs ? mem : NULL;
mem += num_cmdbufs * sizeof(struct host1x_job_gather); mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
job->addr_phys = num_unpins ? mem : NULL; job->addr_phys = num_unpins ? mem : NULL;
job->reloc_addr_phys = job->addr_phys; job->reloc_addr_phys = job->addr_phys;
...@@ -101,22 +101,38 @@ EXPORT_SYMBOL(host1x_job_put); ...@@ -101,22 +101,38 @@ EXPORT_SYMBOL(host1x_job_put);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
unsigned int words, unsigned int offset) unsigned int words, unsigned int offset)
{ {
struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
gather->words = words; gather->words = words;
gather->bo = bo; gather->bo = bo;
gather->offset = offset; gather->offset = offset;
job->num_gathers++; job->num_cmds++;
} }
EXPORT_SYMBOL(host1x_job_add_gather); EXPORT_SYMBOL(host1x_job_add_gather);
void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
bool relative, u32 next_class)
{
struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
cmd->is_wait = true;
cmd->wait.id = id;
cmd->wait.threshold = thresh;
cmd->wait.next_class = next_class;
cmd->wait.relative = relative;
job->num_cmds++;
}
EXPORT_SYMBOL(host1x_job_add_wait);
static unsigned int pin_job(struct host1x *host, struct host1x_job *job) static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{ {
struct host1x_client *client = job->client; struct host1x_client *client = job->client;
struct device *dev = client->dev; struct device *dev = client->dev;
struct host1x_job_gather *g; struct host1x_job_gather *g;
struct iommu_domain *domain; struct iommu_domain *domain;
struct sg_table *sgt;
unsigned int i; unsigned int i;
int err; int err;
...@@ -126,7 +142,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) ...@@ -126,7 +142,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
for (i = 0; i < job->num_relocs; i++) { for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i]; struct host1x_reloc *reloc = &job->relocs[i];
dma_addr_t phys_addr, *phys; dma_addr_t phys_addr, *phys;
struct sg_table *sgt;
reloc->target.bo = host1x_bo_get(reloc->target.bo); reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) { if (!reloc->target.bo) {
...@@ -202,17 +217,20 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) ...@@ -202,17 +217,20 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
return 0; return 0;
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_cmds; i++) {
size_t gather_size = 0; size_t gather_size = 0;
struct scatterlist *sg; struct scatterlist *sg;
struct sg_table *sgt;
dma_addr_t phys_addr; dma_addr_t phys_addr;
unsigned long shift; unsigned long shift;
struct iova *alloc; struct iova *alloc;
dma_addr_t *phys; dma_addr_t *phys;
unsigned int j; unsigned int j;
g = &job->gathers[i]; if (job->cmds[i].is_wait)
continue;
g = &job->cmds[i].gather;
g->bo = host1x_bo_get(g->bo); g->bo = host1x_bo_get(g->bo);
if (!g->bo) { if (!g->bo) {
err = -EINVAL; err = -EINVAL;
...@@ -545,8 +563,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, ...@@ -545,8 +563,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
fw.num_relocs = job->num_relocs; fw.num_relocs = job->num_relocs;
fw.class = job->class; fw.class = job->class;
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g = &job->gathers[i]; struct host1x_job_gather *g;
if (job->cmds[i].is_wait)
continue;
g = &job->cmds[i].gather;
size += g->words * sizeof(u32); size += g->words * sizeof(u32);
} }
...@@ -568,10 +591,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, ...@@ -568,10 +591,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
job->gather_copy_size = size; job->gather_copy_size = size;
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g = &job->gathers[i]; struct host1x_job_gather *g;
void *gather; void *gather;
if (job->cmds[i].is_wait)
continue;
g = &job->cmds[i].gather;
/* Copy the gather */ /* Copy the gather */
gather = host1x_bo_mmap(g->bo); gather = host1x_bo_mmap(g->bo);
memcpy(job->gather_copy_mapped + offset, gather + g->offset, memcpy(job->gather_copy_mapped + offset, gather + g->offset,
...@@ -614,8 +641,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) ...@@ -614,8 +641,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
} }
/* patch gathers */ /* patch gathers */
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g = &job->gathers[i]; struct host1x_job_gather *g;
if (job->cmds[i].is_wait)
continue;
g = &job->cmds[i].gather;
/* process each gather mem only once */ /* process each gather mem only once */
if (g->handled) if (g->handled)
...@@ -625,10 +656,11 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) ...@@ -625,10 +656,11 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
g->base = job->gather_addr_phys[i]; g->base = job->gather_addr_phys[i];
for (j = i + 1; j < job->num_gathers; j++) { for (j = i + 1; j < job->num_cmds; j++) {
if (job->gathers[j].bo == g->bo) { if (!job->cmds[j].is_wait &&
job->gathers[j].handled = true; job->cmds[j].gather.bo == g->bo) {
job->gathers[j].base = g->base; job->cmds[j].gather.handled = true;
job->cmds[j].gather.base = g->base;
} }
} }
......
...@@ -18,6 +18,22 @@ struct host1x_job_gather { ...@@ -18,6 +18,22 @@ struct host1x_job_gather {
bool handled; bool handled;
}; };
struct host1x_job_wait {
u32 id;
u32 threshold;
u32 next_class;
bool relative;
};
struct host1x_job_cmd {
bool is_wait;
union {
struct host1x_job_gather gather;
struct host1x_job_wait wait;
};
};
struct host1x_job_unpin_data { struct host1x_job_unpin_data {
struct host1x_bo *bo; struct host1x_bo *bo;
struct sg_table *sgt; struct sg_table *sgt;
......
...@@ -218,8 +218,8 @@ struct host1x_job { ...@@ -218,8 +218,8 @@ struct host1x_job {
struct host1x_client *client; struct host1x_client *client;
/* Gathers and their memory */ /* Gathers and their memory */
struct host1x_job_gather *gathers; struct host1x_job_cmd *cmds;
unsigned int num_gathers; unsigned int num_cmds;
/* Array of handles to be pinned & unpinned */ /* Array of handles to be pinned & unpinned */
struct host1x_reloc *relocs; struct host1x_reloc *relocs;
...@@ -278,6 +278,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, ...@@ -278,6 +278,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
u32 num_cmdbufs, u32 num_relocs); u32 num_cmdbufs, u32 num_relocs);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
unsigned int words, unsigned int offset); unsigned int words, unsigned int offset);
void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
bool relative, u32 next_class);
struct host1x_job *host1x_job_get(struct host1x_job *job); struct host1x_job *host1x_job_get(struct host1x_job *job);
void host1x_job_put(struct host1x_job *job); void host1x_job_put(struct host1x_job *job);
int host1x_job_pin(struct host1x_job *job, struct device *dev); int host1x_job_pin(struct host1x_job *job, struct device *dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment