Commit 4d60100a authored by Ben Skeggs's avatar Ben Skeggs

drm/nouveau/fifo: add common channel recovery

That sure was fun to untangle.

- handled per-runlist, rather than globally
- more straight-forward process in general
- various potential SW/HW races have been fixed
- fixes lockdep issues that were present in >=gk104's prior implementation
- volta recovery now actually stands a chance of working
- volta/turing waiting for PBDMA idle before engine reset
- turing using hw-provided TSG info for CTXSW_TIMEOUT
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
Reviewed-by: default avatarLyude Paul <lyude@redhat.com>
parent 0b1bb129
...@@ -93,11 +93,12 @@ static int ...@@ -93,11 +93,12 @@ static int
nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend) nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend)
{ {
struct nvkm_fifo *fifo = nvkm_fifo(engine); struct nvkm_fifo *fifo = nvkm_fifo(engine);
struct nvkm_runl *runl;
nvkm_inth_block(&fifo->engine.subdev.inth); nvkm_inth_block(&fifo->engine.subdev.inth);
if (fifo->func->fini) nvkm_runl_foreach(runl, fifo)
fifo->func->fini(fifo); nvkm_runl_fini(runl);
return 0; return 0;
} }
......
...@@ -168,6 +168,18 @@ nvkm_cgrp_ref(struct nvkm_cgrp *cgrp) ...@@ -168,6 +168,18 @@ nvkm_cgrp_ref(struct nvkm_cgrp *cgrp)
return cgrp; return cgrp;
} }
void
nvkm_cgrp_put(struct nvkm_cgrp **pcgrp, unsigned long irqflags)
{
struct nvkm_cgrp *cgrp = *pcgrp;
if (!cgrp)
return;
*pcgrp = NULL;
spin_unlock_irqrestore(&cgrp->lock, irqflags);
}
int int
nvkm_cgrp_new(struct nvkm_runl *runl, const char *name, struct nvkm_vmm *vmm, bool hw, nvkm_cgrp_new(struct nvkm_runl *runl, const char *name, struct nvkm_vmm *vmm, bool hw,
struct nvkm_cgrp **pcgrp) struct nvkm_cgrp **pcgrp)
...@@ -190,6 +202,7 @@ nvkm_cgrp_new(struct nvkm_runl *runl, const char *name, struct nvkm_vmm *vmm, bo ...@@ -190,6 +202,7 @@ nvkm_cgrp_new(struct nvkm_runl *runl, const char *name, struct nvkm_vmm *vmm, bo
INIT_LIST_HEAD(&cgrp->ectxs); INIT_LIST_HEAD(&cgrp->ectxs);
INIT_LIST_HEAD(&cgrp->vctxs); INIT_LIST_HEAD(&cgrp->vctxs);
mutex_init(&cgrp->mutex); mutex_init(&cgrp->mutex);
atomic_set(&cgrp->rc, NVKM_CGRP_RC_NONE);
if (runl->cgid) { if (runl->cgid) {
cgrp->id = nvkm_chid_get(runl->cgid, cgrp); cgrp->id = nvkm_chid_get(runl->cgid, cgrp);
......
...@@ -40,6 +40,11 @@ struct nvkm_cgrp { ...@@ -40,6 +40,11 @@ struct nvkm_cgrp {
struct list_head vctxs; struct list_head vctxs;
struct mutex mutex; struct mutex mutex;
#define NVKM_CGRP_RC_NONE 0
#define NVKM_CGRP_RC_PENDING 1
#define NVKM_CGRP_RC_RUNNING 2
atomic_t rc;
struct list_head head; struct list_head head;
struct list_head chan; struct list_head chan;
}; };
...@@ -52,6 +57,12 @@ int nvkm_cgrp_vctx_get(struct nvkm_cgrp *, struct nvkm_engn *, struct nvkm_chan ...@@ -52,6 +57,12 @@ int nvkm_cgrp_vctx_get(struct nvkm_cgrp *, struct nvkm_engn *, struct nvkm_chan
struct nvkm_vctx **, struct nvkm_client *); struct nvkm_vctx **, struct nvkm_client *);
void nvkm_cgrp_vctx_put(struct nvkm_cgrp *, struct nvkm_vctx **); void nvkm_cgrp_vctx_put(struct nvkm_cgrp *, struct nvkm_vctx **);
void nvkm_cgrp_put(struct nvkm_cgrp **, unsigned long irqflags);
#define nvkm_cgrp_foreach_chan(chan,cgrp) for ((chan) = (cgrp)->chans; (chan); (chan) = NULL)
#define nvkm_cgrp_foreach_chan_safe(chan,ctmp,cgrp) \
(void)(ctmp); nvkm_cgrp_foreach_chan((chan), (cgrp))
#define CGRP_PRCLI(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:[%s]"f, (c)->id, (c)->name, ##a) #define CGRP_PRCLI(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:[%s]"f, (c)->id, (c)->name, ##a)
#define CGRP_PRINT(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:"f, (c)->id, ##a) #define CGRP_PRINT(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:"f, (c)->id, ##a)
#define CGRP_ERROR(c,f,a...) CGRP_PRCLI((c), ERROR, err, " "f"\n", ##a) #define CGRP_ERROR(c,f,a...) CGRP_PRCLI((c), ERROR, err, " "f"\n", ##a)
......
...@@ -12,11 +12,6 @@ struct gf100_fifo { ...@@ -12,11 +12,6 @@ struct gf100_fifo {
struct list_head chan; struct list_head chan;
struct {
struct work_struct work;
u64 mask;
} recover;
struct { struct {
struct nvkm_memory *mem[2]; struct nvkm_memory *mem[2];
int active; int active;
......
...@@ -14,16 +14,8 @@ struct gk104_fifo { ...@@ -14,16 +14,8 @@ struct gk104_fifo {
const struct gk104_fifo_func *func; const struct gk104_fifo_func *func;
struct nvkm_fifo base; struct nvkm_fifo base;
struct {
struct work_struct work;
u32 engm;
u32 runm;
} recover;
struct { struct {
struct nvkm_engine *engine; struct nvkm_engine *engine;
int runl;
int pbid;
} engine[16]; } engine[16];
int engine_nr; int engine_nr;
...@@ -43,29 +35,14 @@ struct gk104_fifo { ...@@ -43,29 +35,14 @@ struct gk104_fifo {
} user; } user;
}; };
struct gk104_fifo_engine_status {
bool busy;
bool faulted;
bool chsw;
bool save;
bool load;
struct {
bool tsg;
u32 id;
} prev, next, *chan;
};
int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type,
int index, int nr, struct nvkm_fifo **); int index, int nr, struct nvkm_fifo **);
void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *); void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *); void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
void gk104_fifo_runlist_update(struct gk104_fifo *, int runl); void gk104_fifo_runlist_update(struct gk104_fifo *, int runl);
void gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
struct gk104_fifo_engine_status *status);
void *gk104_fifo_dtor(struct nvkm_fifo *base); void *gk104_fifo_dtor(struct nvkm_fifo *base);
int gk104_fifo_oneinit(struct nvkm_fifo *); int gk104_fifo_oneinit(struct nvkm_fifo *);
void gk104_fifo_init(struct nvkm_fifo *base); void gk104_fifo_init(struct nvkm_fifo *base);
void gk104_fifo_fini(struct nvkm_fifo *base);
extern const struct gk104_fifo_runlist_func gk104_fifo_runlist; extern const struct gk104_fifo_runlist_func gk104_fifo_runlist;
void gk104_fifo_runlist_chan(struct gk104_fifo_chan *, void gk104_fifo_runlist_chan(struct gk104_fifo_chan *,
......
...@@ -90,6 +90,7 @@ gk110_runl = { ...@@ -90,6 +90,7 @@ gk110_runl = {
.pending = gk104_runl_pending, .pending = gk104_runl_pending,
.block = gk104_runl_block, .block = gk104_runl_block,
.allow = gk104_runl_allow, .allow = gk104_runl_allow,
.fault_clear = gk104_runl_fault_clear,
.preempt_pending = gf100_runl_preempt_pending, .preempt_pending = gf100_runl_preempt_pending,
}; };
...@@ -115,12 +116,11 @@ gk110_fifo = { ...@@ -115,12 +116,11 @@ gk110_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit, .intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gk104_fifo_mmu_fault, .mmu_fault = &gk104_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gk110_fifo_runlist, .runlist = &gk110_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gk110_runl, .runl = &gk110_runl,
......
...@@ -40,6 +40,7 @@ gk208_runq = { ...@@ -40,6 +40,7 @@ gk208_runq = {
.init = gk208_runq_init, .init = gk208_runq_init,
.intr = gk104_runq_intr, .intr = gk104_runq_intr,
.intr_0_names = gk104_runq_intr_0_names, .intr_0_names = gk104_runq_intr_0_names,
.idle = gk104_runq_idle,
}; };
static int static int
...@@ -58,12 +59,11 @@ gk208_fifo = { ...@@ -58,12 +59,11 @@ gk208_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit, .intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gk104_fifo_mmu_fault, .mmu_fault = &gk104_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gk110_fifo_runlist, .runlist = &gk110_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gk110_runl, .runl = &gk110_runl,
......
...@@ -34,12 +34,11 @@ gk20a_fifo = { ...@@ -34,12 +34,11 @@ gk20a_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit, .intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gk104_fifo_mmu_fault, .mmu_fault = &gk104_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gk110_fifo_runlist, .runlist = &gk110_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gk110_runl, .runl = &gk110_runl,
......
...@@ -63,6 +63,7 @@ gm107_runl = { ...@@ -63,6 +63,7 @@ gm107_runl = {
.pending = gk104_runl_pending, .pending = gk104_runl_pending,
.block = gk104_runl_block, .block = gk104_runl_block,
.allow = gk104_runl_allow, .allow = gk104_runl_allow,
.fault_clear = gk104_runl_fault_clear,
.preempt_pending = gf100_runl_preempt_pending, .preempt_pending = gf100_runl_preempt_pending,
}; };
...@@ -139,12 +140,11 @@ gm107_fifo = { ...@@ -139,12 +140,11 @@ gm107_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit, .intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit,
.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gm107_fifo_mmu_fault, .mmu_fault = &gm107_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gm107_fifo_runlist, .runlist = &gm107_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gm107_runl, .runl = &gm107_runl,
......
...@@ -48,12 +48,11 @@ gm200_fifo = { ...@@ -48,12 +48,11 @@ gm200_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit, .intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit,
.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gm107_fifo_mmu_fault, .mmu_fault = &gm107_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gm107_fifo_runlist, .runlist = &gm107_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gm107_runl, .runl = &gm107_runl,
......
...@@ -35,6 +35,7 @@ gp100_runl = { ...@@ -35,6 +35,7 @@ gp100_runl = {
.pending = gk104_runl_pending, .pending = gk104_runl_pending,
.block = gk104_runl_block, .block = gk104_runl_block,
.allow = gk104_runl_allow, .allow = gk104_runl_allow,
.fault_clear = gk104_runl_fault_clear,
.preempt_pending = gf100_runl_preempt_pending, .preempt_pending = gf100_runl_preempt_pending,
}; };
...@@ -106,12 +107,11 @@ gp100_fifo = { ...@@ -106,12 +107,11 @@ gp100_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_mmu_fault_unit = gp100_fifo_intr_mmu_fault_unit, .intr_mmu_fault_unit = gp100_fifo_intr_mmu_fault_unit,
.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gp100_fifo_mmu_fault, .mmu_fault = &gp100_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gm107_fifo_runlist, .runlist = &gm107_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gp100_runl, .runl = &gp100_runl,
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "chan.h" #include "chan.h"
#include "chid.h"
#include "cgrp.h" #include "cgrp.h"
#include "runl.h" #include "runl.h"
#include "runq.h" #include "runq.h"
...@@ -49,10 +50,14 @@ gv100_chan = { ...@@ -49,10 +50,14 @@ gv100_chan = {
const struct nvkm_engn_func const struct nvkm_engn_func
gv100_engn = { gv100_engn = {
.chsw = gk104_engn_chsw,
.cxid = gk104_engn_cxid,
}; };
const struct nvkm_engn_func const struct nvkm_engn_func
gv100_engn_ce = { gv100_engn_ce = {
.chsw = gk104_engn_chsw,
.cxid = gk104_engn_cxid,
}; };
static bool static bool
...@@ -83,8 +88,15 @@ gv100_runq = { ...@@ -83,8 +88,15 @@ gv100_runq = {
.intr = gk104_runq_intr, .intr = gk104_runq_intr,
.intr_0_names = gk104_runq_intr_0_names, .intr_0_names = gk104_runq_intr_0_names,
.intr_1_ctxnotvalid = gv100_runq_intr_1_ctxnotvalid, .intr_1_ctxnotvalid = gv100_runq_intr_1_ctxnotvalid,
.idle = gk104_runq_idle,
}; };
void
gv100_runl_preempt(struct nvkm_runl *runl)
{
nvkm_wr32(runl->fifo->engine.subdev.device, 0x002638, BIT(runl->id));
}
void void
gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan, gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan,
struct nvkm_memory *memory, u32 offset) struct nvkm_memory *memory, u32 offset)
...@@ -123,6 +135,7 @@ gv100_runl = { ...@@ -123,6 +135,7 @@ gv100_runl = {
.pending = gk104_runl_pending, .pending = gk104_runl_pending,
.block = gk104_runl_block, .block = gk104_runl_block,
.allow = gk104_runl_allow, .allow = gk104_runl_allow,
.preempt = gv100_runl_preempt,
.preempt_pending = gf100_runl_preempt_pending, .preempt_pending = gf100_runl_preempt_pending,
}; };
...@@ -362,6 +375,18 @@ gv100_fifo_mmu_fault = { ...@@ -362,6 +375,18 @@ gv100_fifo_mmu_fault = {
.gpcclient = gv100_fifo_mmu_fault_gpcclient, .gpcclient = gv100_fifo_mmu_fault_gpcclient,
}; };
static void
gv100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo, u32 engm)
{
struct nvkm_runl *runl;
struct nvkm_engn *engn;
nvkm_runl_foreach(runl, fifo) {
nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id))
nvkm_runl_rc_engn(runl, engn);
}
}
static const struct nvkm_fifo_func static const struct nvkm_fifo_func
gv100_fifo = { gv100_fifo = {
.dtor = gk104_fifo_dtor, .dtor = gk104_fifo_dtor,
...@@ -372,11 +397,10 @@ gv100_fifo = { ...@@ -372,11 +397,10 @@ gv100_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = gk104_fifo_init_pbdmas, .init_pbdmas = gk104_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = gk104_fifo_intr, .intr = gk104_fifo_intr,
.intr_ctxsw_timeout = gv100_fifo_intr_ctxsw_timeout,
.mmu_fault = &gv100_fifo_mmu_fault, .mmu_fault = &gv100_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = gk104_fifo_recover_chan,
.runlist = &gv100_fifo_runlist, .runlist = &gv100_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &gv100_runl, .runl = &gv100_runl,
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <engine/fifo.h> #include <engine/fifo.h>
#include <core/enum.h> #include <core/enum.h>
struct nvkm_cgrp; struct nvkm_cgrp;
struct nvkm_engn;
struct nvkm_memory; struct nvkm_memory;
struct nvkm_runl; struct nvkm_runl;
struct nvkm_runq; struct nvkm_runq;
...@@ -24,10 +25,9 @@ struct nvkm_fifo_func { ...@@ -24,10 +25,9 @@ struct nvkm_fifo_func {
void (*init)(struct nvkm_fifo *); void (*init)(struct nvkm_fifo *);
void (*init_pbdmas)(struct nvkm_fifo *, u32 mask); void (*init_pbdmas)(struct nvkm_fifo *, u32 mask);
void (*fini)(struct nvkm_fifo *);
irqreturn_t (*intr)(struct nvkm_inth *); irqreturn_t (*intr)(struct nvkm_inth *);
void (*intr_mmu_fault_unit)(struct nvkm_fifo *, int unit); void (*intr_mmu_fault_unit)(struct nvkm_fifo *, int unit);
void (*intr_ctxsw_timeout)(struct nvkm_fifo *, u32 engm);
const struct nvkm_fifo_func_mmu_fault { const struct nvkm_fifo_func_mmu_fault {
void (*recover)(struct nvkm_fifo *, struct nvkm_fault_data *); void (*recover)(struct nvkm_fifo *, struct nvkm_fault_data *);
...@@ -41,7 +41,6 @@ struct nvkm_fifo_func { ...@@ -41,7 +41,6 @@ struct nvkm_fifo_func {
int (*engine_id)(struct nvkm_fifo *, struct nvkm_engine *); int (*engine_id)(struct nvkm_fifo *, struct nvkm_engine *);
void (*pause)(struct nvkm_fifo *, unsigned long *); void (*pause)(struct nvkm_fifo *, unsigned long *);
void (*start)(struct nvkm_fifo *, unsigned long *); void (*start)(struct nvkm_fifo *, unsigned long *);
void (*recover_chan)(struct nvkm_fifo *, int chid);
const struct gk104_fifo_runlist_func { const struct gk104_fifo_runlist_func {
u8 size; u8 size;
...@@ -116,12 +115,16 @@ int gf100_fifo_runq_nr(struct nvkm_fifo *); ...@@ -116,12 +115,16 @@ int gf100_fifo_runq_nr(struct nvkm_fifo *);
bool gf100_fifo_intr_pbdma(struct nvkm_fifo *); bool gf100_fifo_intr_pbdma(struct nvkm_fifo *);
void gf100_fifo_intr_mmu_fault(struct nvkm_fifo *); void gf100_fifo_intr_mmu_fault(struct nvkm_fifo *);
void gf100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *, int); void gf100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *, int);
void gf100_fifo_intr_sched(struct nvkm_fifo *);
void gf100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *, u32);
void gf100_fifo_mmu_fault_recover(struct nvkm_fifo *, struct nvkm_fault_data *); void gf100_fifo_mmu_fault_recover(struct nvkm_fifo *, struct nvkm_fault_data *);
extern const struct nvkm_enum gf100_fifo_mmu_fault_access[]; extern const struct nvkm_enum gf100_fifo_mmu_fault_access[];
extern const struct nvkm_event_func gf100_fifo_nonstall; extern const struct nvkm_event_func gf100_fifo_nonstall;
bool gf100_runl_preempt_pending(struct nvkm_runl *); bool gf100_runl_preempt_pending(struct nvkm_runl *);
void gf100_runq_init(struct nvkm_runq *); void gf100_runq_init(struct nvkm_runq *);
bool gf100_runq_intr(struct nvkm_runq *, struct nvkm_runl *); bool gf100_runq_intr(struct nvkm_runq *, struct nvkm_runl *);
void gf100_engn_mmu_fault_trigger(struct nvkm_engn *);
bool gf100_engn_mmu_fault_triggered(struct nvkm_engn *);
extern const struct nvkm_engn_func gf100_engn_sw; extern const struct nvkm_engn_func gf100_engn_sw;
void gf100_chan_preempt(struct nvkm_chan *); void gf100_chan_preempt(struct nvkm_chan *);
...@@ -136,16 +139,19 @@ extern const struct nvkm_fifo_func_mmu_fault gk104_fifo_mmu_fault; ...@@ -136,16 +139,19 @@ extern const struct nvkm_fifo_func_mmu_fault gk104_fifo_mmu_fault;
extern const struct nvkm_enum gk104_fifo_mmu_fault_reason[]; extern const struct nvkm_enum gk104_fifo_mmu_fault_reason[];
extern const struct nvkm_enum gk104_fifo_mmu_fault_hubclient[]; extern const struct nvkm_enum gk104_fifo_mmu_fault_hubclient[];
extern const struct nvkm_enum gk104_fifo_mmu_fault_gpcclient[]; extern const struct nvkm_enum gk104_fifo_mmu_fault_gpcclient[];
void gk104_fifo_recover_chan(struct nvkm_fifo *, int);
int gk104_fifo_engine_id(struct nvkm_fifo *, struct nvkm_engine *); int gk104_fifo_engine_id(struct nvkm_fifo *, struct nvkm_engine *);
bool gk104_runl_pending(struct nvkm_runl *); bool gk104_runl_pending(struct nvkm_runl *);
void gk104_runl_block(struct nvkm_runl *, u32); void gk104_runl_block(struct nvkm_runl *, u32);
void gk104_runl_allow(struct nvkm_runl *, u32); void gk104_runl_allow(struct nvkm_runl *, u32);
void gk104_runl_fault_clear(struct nvkm_runl *);
extern const struct nvkm_runq_func gk104_runq; extern const struct nvkm_runq_func gk104_runq;
void gk104_runq_init(struct nvkm_runq *); void gk104_runq_init(struct nvkm_runq *);
bool gk104_runq_intr(struct nvkm_runq *, struct nvkm_runl *); bool gk104_runq_intr(struct nvkm_runq *, struct nvkm_runl *);
extern const struct nvkm_bitfield gk104_runq_intr_0_names[]; extern const struct nvkm_bitfield gk104_runq_intr_0_names[];
bool gk104_runq_idle(struct nvkm_runq *);
extern const struct nvkm_engn_func gk104_engn; extern const struct nvkm_engn_func gk104_engn;
bool gk104_engn_chsw(struct nvkm_engn *);
int gk104_engn_cxid(struct nvkm_engn *, bool *cgid);
extern const struct nvkm_engn_func gk104_engn_ce; extern const struct nvkm_engn_func gk104_engn_ce;
void gk104_chan_bind(struct nvkm_chan *); void gk104_chan_bind(struct nvkm_chan *);
void gk104_chan_bind_inst(struct nvkm_chan *); void gk104_chan_bind_inst(struct nvkm_chan *);
...@@ -174,10 +180,12 @@ extern const struct nvkm_enum gv100_fifo_mmu_fault_access[]; ...@@ -174,10 +180,12 @@ extern const struct nvkm_enum gv100_fifo_mmu_fault_access[];
extern const struct nvkm_enum gv100_fifo_mmu_fault_reason[]; extern const struct nvkm_enum gv100_fifo_mmu_fault_reason[];
extern const struct nvkm_enum gv100_fifo_mmu_fault_hubclient[]; extern const struct nvkm_enum gv100_fifo_mmu_fault_hubclient[];
extern const struct nvkm_enum gv100_fifo_mmu_fault_gpcclient[]; extern const struct nvkm_enum gv100_fifo_mmu_fault_gpcclient[];
void gv100_runl_preempt(struct nvkm_runl *);
extern const struct nvkm_runq_func gv100_runq; extern const struct nvkm_runq_func gv100_runq;
extern const struct nvkm_engn_func gv100_engn; extern const struct nvkm_engn_func gv100_engn;
extern const struct nvkm_engn_func gv100_engn_ce; extern const struct nvkm_engn_func gv100_engn_ce;
void tu102_fifo_intr_ctxsw_timeout_info(struct nvkm_engn *, u32 info);
extern const struct nvkm_fifo_func_mmu_fault tu102_fifo_mmu_fault; extern const struct nvkm_fifo_func_mmu_fault tu102_fifo_mmu_fault;
int nvkm_uchan_new(struct nvkm_fifo *, struct nvkm_cgrp *, const struct nvkm_oclass *, int nvkm_uchan_new(struct nvkm_fifo *, struct nvkm_cgrp *, const struct nvkm_oclass *,
......
...@@ -24,11 +24,164 @@ ...@@ -24,11 +24,164 @@
#include "chan.h" #include "chan.h"
#include "chid.h" #include "chid.h"
#include "priv.h" #include "priv.h"
#include "runq.h"
#include <core/gpuobj.h> #include <core/gpuobj.h>
#include <subdev/timer.h> #include <subdev/timer.h>
#include <subdev/top.h> #include <subdev/top.h>
struct nvkm_cgrp *
nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags)
{
struct nvkm_cgrp *cgrp = NULL;
struct nvkm_chan *chan;
bool cgid;
int id;
id = engn->func->cxid(engn, &cgid);
if (id < 0)
return NULL;
if (!cgid) {
chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags);
if (chan)
cgrp = chan->cgrp;
} else {
cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags);
}
WARN_ON(!cgrp);
return cgrp;
}
#include "gf100.h"
#include "gk104.h"
static void
nvkm_runl_rc(struct nvkm_runl *runl)
{
struct nvkm_fifo *fifo = runl->fifo;
struct nvkm_cgrp *cgrp, *gtmp;
struct nvkm_chan *chan, *ctmp;
struct nvkm_engn *engn;
unsigned long flags;
int rc, state, i;
bool reset;
/* Runlist is blocked before scheduling recovery - fetch count. */
BUG_ON(!mutex_is_locked(&runl->mutex));
rc = atomic_xchg(&runl->rc_pending, 0);
if (!rc)
return;
/* Look for channel groups flagged for RC. */
nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) {
state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING);
if (state == NVKM_CGRP_RC_PENDING) {
/* Disable all channels in them, and remove from runlist. */
nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp)
nvkm_chan_error(chan, false);
}
}
/* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */
if (runl->func->preempt) {
for (i = 0; i < runl->runq_nr; i++) {
struct nvkm_runq *runq = runl->runq[i];
if (runq) {
nvkm_msec(fifo->engine.subdev.device, 2000,
if (runq->func->idle(runq))
break;
);
}
}
}
/* Look for engines that are still on flagged channel groups - reset them. */
nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) {
cgrp = nvkm_engn_cgrp_get(engn, &flags);
if (!cgrp) {
ENGN_DEBUG(engn, "cxid not valid");
continue;
}
reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING;
nvkm_cgrp_put(&cgrp, flags);
if (!reset) {
ENGN_DEBUG(engn, "cxid not in recovery");
continue;
}
ENGN_DEBUG(engn, "resetting...");
nvkm_subdev_fini(&engn->engine->subdev, false);
WARN_ON(nvkm_subdev_init(&engn->engine->subdev));
}
/* Submit runlist update, and clear any remaining exception state. */
if (runl->fifo->engine.subdev.device->card_type < NV_E0)
gf100_fifo_runlist_commit(gf100_fifo(runl->fifo));
else
gk104_fifo_runlist_update(gk104_fifo(runl->fifo), runl->id);
if (runl->func->fault_clear)
runl->func->fault_clear(runl);
/* Unblock runlist processing. */
while (rc--)
nvkm_runl_allow(runl);
}
static void
nvkm_runl_rc_runl(struct nvkm_runl *runl)
{
RUNL_ERROR(runl, "rc scheduled");
nvkm_runl_block(runl);
if (runl->func->preempt)
runl->func->preempt(runl);
atomic_inc(&runl->rc_pending);
schedule_work(&runl->work);
}
void
nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp)
{
if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE)
return;
CGRP_ERROR(cgrp, "rc scheduled");
nvkm_runl_rc_runl(cgrp->runl);
}
void
nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn)
{
struct nvkm_cgrp *cgrp;
unsigned long flags;
/* Lookup channel group currently on engine. */
cgrp = nvkm_engn_cgrp_get(engn, &flags);
if (!cgrp) {
ENGN_DEBUG(engn, "rc skipped, not on channel");
return;
}
nvkm_runl_rc_cgrp(cgrp);
nvkm_cgrp_put(&cgrp, flags);
}
static void
nvkm_runl_work(struct work_struct *work)
{
struct nvkm_runl *runl = container_of(work, typeof(*runl), work);
mutex_lock(&runl->mutex);
nvkm_runl_rc(runl);
mutex_unlock(&runl->mutex);
}
struct nvkm_chan * struct nvkm_chan *
nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags) nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags)
{ {
...@@ -74,6 +227,27 @@ nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags ...@@ -74,6 +227,27 @@ nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags
return NULL; return NULL;
} }
struct nvkm_cgrp *
nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
{
struct nvkm_chid *cgid = runl->cgid;
struct nvkm_cgrp *cgrp;
unsigned long flags;
spin_lock_irqsave(&cgid->lock, flags);
if (!WARN_ON(id >= cgid->nr)) {
cgrp = cgid->data[id];
if (likely(cgrp)) {
spin_lock(&cgrp->lock);
*pirqflags = flags;
spin_unlock(&cgid->lock);
return cgrp;
}
}
spin_unlock_irqrestore(&cgid->lock, flags);
return NULL;
}
int int
nvkm_runl_preempt_wait(struct nvkm_runl *runl) nvkm_runl_preempt_wait(struct nvkm_runl *runl)
{ {
...@@ -81,6 +255,7 @@ nvkm_runl_preempt_wait(struct nvkm_runl *runl) ...@@ -81,6 +255,7 @@ nvkm_runl_preempt_wait(struct nvkm_runl *runl)
if (!runl->func->preempt_pending(runl)) if (!runl->func->preempt_pending(runl))
break; break;
nvkm_runl_rc(runl);
usleep_range(1, 2); usleep_range(1, 2);
) < 0 ? -ETIMEDOUT : 0; ) < 0 ? -ETIMEDOUT : 0;
} }
...@@ -91,6 +266,7 @@ nvkm_runl_update_pending(struct nvkm_runl *runl) ...@@ -91,6 +266,7 @@ nvkm_runl_update_pending(struct nvkm_runl *runl)
if (!runl->func->pending(runl)) if (!runl->func->pending(runl))
return false; return false;
nvkm_runl_rc(runl);
return true; return true;
} }
...@@ -122,6 +298,12 @@ nvkm_runl_block(struct nvkm_runl *runl) ...@@ -122,6 +298,12 @@ nvkm_runl_block(struct nvkm_runl *runl)
spin_unlock_irqrestore(&fifo->lock, flags); spin_unlock_irqrestore(&fifo->lock, flags);
} }
void
nvkm_runl_fini(struct nvkm_runl *runl)
{
flush_work(&runl->work);
}
void void
nvkm_runl_del(struct nvkm_runl *runl) nvkm_runl_del(struct nvkm_runl *runl)
{ {
...@@ -214,6 +396,9 @@ nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr) ...@@ -214,6 +396,9 @@ nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
INIT_LIST_HEAD(&runl->engns); INIT_LIST_HEAD(&runl->engns);
INIT_LIST_HEAD(&runl->cgrps); INIT_LIST_HEAD(&runl->cgrps);
mutex_init(&runl->mutex); mutex_init(&runl->mutex);
INIT_WORK(&runl->work, nvkm_runl_work);
atomic_set(&runl->rc_triggered, 0);
atomic_set(&runl->rc_pending, 0);
list_add_tail(&runl->head, &fifo->runls); list_add_tail(&runl->head, &fifo->runls);
if (!fifo->chid) { if (!fifo->chid) {
......
...@@ -7,6 +7,10 @@ enum nvkm_subdev_type; ...@@ -7,6 +7,10 @@ enum nvkm_subdev_type;
struct nvkm_engn { struct nvkm_engn {
const struct nvkm_engn_func { const struct nvkm_engn_func {
bool (*chsw)(struct nvkm_engn *);
int (*cxid)(struct nvkm_engn *, bool *cgid);
void (*mmu_fault_trigger)(struct nvkm_engn *);
bool (*mmu_fault_triggered)(struct nvkm_engn *);
} *func; } *func;
struct nvkm_runl *runl; struct nvkm_runl *runl;
int id; int id;
...@@ -28,6 +32,8 @@ struct nvkm_runl { ...@@ -28,6 +32,8 @@ struct nvkm_runl {
bool (*pending)(struct nvkm_runl *); bool (*pending)(struct nvkm_runl *);
void (*block)(struct nvkm_runl *, u32 engm); void (*block)(struct nvkm_runl *, u32 engm);
void (*allow)(struct nvkm_runl *, u32 engm); void (*allow)(struct nvkm_runl *, u32 engm);
void (*fault_clear)(struct nvkm_runl *);
void (*preempt)(struct nvkm_runl *);
bool (*preempt_pending)(struct nvkm_runl *); bool (*preempt_pending)(struct nvkm_runl *);
} *func; } *func;
struct nvkm_fifo *fifo; struct nvkm_fifo *fifo;
...@@ -50,6 +56,10 @@ struct nvkm_runl { ...@@ -50,6 +56,10 @@ struct nvkm_runl {
int blocked; int blocked;
struct work_struct work;
atomic_t rc_triggered;
atomic_t rc_pending;
struct list_head head; struct list_head head;
}; };
...@@ -58,11 +68,16 @@ struct nvkm_runl *nvkm_runl_get(struct nvkm_fifo *, int runi, u32 addr); ...@@ -58,11 +68,16 @@ struct nvkm_runl *nvkm_runl_get(struct nvkm_fifo *, int runi, u32 addr);
struct nvkm_engn *nvkm_runl_add(struct nvkm_runl *, int engi, const struct nvkm_engn_func *, struct nvkm_engn *nvkm_runl_add(struct nvkm_runl *, int engi, const struct nvkm_engn_func *,
enum nvkm_subdev_type, int inst); enum nvkm_subdev_type, int inst);
void nvkm_runl_del(struct nvkm_runl *); void nvkm_runl_del(struct nvkm_runl *);
void nvkm_runl_fini(struct nvkm_runl *);
void nvkm_runl_block(struct nvkm_runl *); void nvkm_runl_block(struct nvkm_runl *);
void nvkm_runl_allow(struct nvkm_runl *); void nvkm_runl_allow(struct nvkm_runl *);
bool nvkm_runl_update_pending(struct nvkm_runl *); bool nvkm_runl_update_pending(struct nvkm_runl *);
int nvkm_runl_preempt_wait(struct nvkm_runl *); int nvkm_runl_preempt_wait(struct nvkm_runl *);
void nvkm_runl_rc_engn(struct nvkm_runl *, struct nvkm_engn *);
void nvkm_runl_rc_cgrp(struct nvkm_cgrp *);
struct nvkm_cgrp *nvkm_runl_cgrp_get_cgid(struct nvkm_runl *, int cgid, unsigned long *irqflags);
struct nvkm_chan *nvkm_runl_chan_get_chid(struct nvkm_runl *, int chid, unsigned long *irqflags); struct nvkm_chan *nvkm_runl_chan_get_chid(struct nvkm_runl *, int chid, unsigned long *irqflags);
struct nvkm_chan *nvkm_runl_chan_get_inst(struct nvkm_runl *, u64 inst, unsigned long *irqflags); struct nvkm_chan *nvkm_runl_chan_get_inst(struct nvkm_runl *, u64 inst, unsigned long *irqflags);
...@@ -74,6 +89,9 @@ struct nvkm_chan *nvkm_runl_chan_get_inst(struct nvkm_runl *, u64 inst, unsigned ...@@ -74,6 +89,9 @@ struct nvkm_chan *nvkm_runl_chan_get_inst(struct nvkm_runl *, u64 inst, unsigned
#define nvkm_runl_foreach_engn(engn,runl) list_for_each_entry((engn), &(runl)->engns, head) #define nvkm_runl_foreach_engn(engn,runl) list_for_each_entry((engn), &(runl)->engns, head)
#define nvkm_runl_foreach_engn_cond(engn,runl,cond) \ #define nvkm_runl_foreach_engn_cond(engn,runl,cond) \
nvkm_list_foreach(engn, &(runl)->engns, head, (cond)) nvkm_list_foreach(engn, &(runl)->engns, head, (cond))
#define nvkm_runl_foreach_cgrp(cgrp,runl) list_for_each_entry((cgrp), &(runl)->cgrps, head)
#define nvkm_runl_foreach_cgrp_safe(cgrp,gtmp,runl) \
list_for_each_entry_safe((cgrp), (gtmp), &(runl)->cgrps, head)
#define RUNL_PRINT(r,l,p,f,a...) \ #define RUNL_PRINT(r,l,p,f,a...) \
nvkm_printk__(&(r)->fifo->engine.subdev, NV_DBG_##l, p, "%06x:"f, (r)->addr, ##a) nvkm_printk__(&(r)->fifo->engine.subdev, NV_DBG_##l, p, "%06x:"f, (r)->addr, ##a)
......
...@@ -10,6 +10,7 @@ struct nvkm_runq { ...@@ -10,6 +10,7 @@ struct nvkm_runq {
bool (*intr)(struct nvkm_runq *, struct nvkm_runl *); bool (*intr)(struct nvkm_runq *, struct nvkm_runl *);
const struct nvkm_bitfield *intr_0_names; const struct nvkm_bitfield *intr_0_names;
bool (*intr_1_ctxnotvalid)(struct nvkm_runq *, int chid); bool (*intr_1_ctxnotvalid)(struct nvkm_runq *, int chid);
bool (*idle)(struct nvkm_runq *);
} *func; } *func;
struct nvkm_fifo *fifo; struct nvkm_fifo *fifo;
int id; int id;
......
...@@ -92,6 +92,7 @@ tu102_runl = { ...@@ -92,6 +92,7 @@ tu102_runl = {
.pending = tu102_runl_pending, .pending = tu102_runl_pending,
.block = gk104_runl_block, .block = gk104_runl_block,
.allow = gk104_runl_allow, .allow = gk104_runl_allow,
.preempt = gv100_runl_preempt,
.preempt_pending = gf100_runl_preempt_pending, .preempt_pending = gf100_runl_preempt_pending,
}; };
...@@ -123,155 +124,6 @@ tu102_fifo_mmu_fault_engine[] = { ...@@ -123,155 +124,6 @@ tu102_fifo_mmu_fault_engine[] = {
{} {}
}; };
static void
tu102_fifo_recover_work(struct work_struct *w)
{
struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
struct nvkm_device *device = fifo->base.engine.subdev.device;
struct nvkm_engine *engine;
unsigned long flags;
u32 engm, runm, todo;
int engn, runl;
spin_lock_irqsave(&fifo->base.lock, flags);
runm = fifo->recover.runm;
engm = fifo->recover.engm;
fifo->recover.engm = 0;
fifo->recover.runm = 0;
spin_unlock_irqrestore(&fifo->base.lock, flags);
nvkm_mask(device, 0x002630, runm, runm);
for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
if ((engine = fifo->engine[engn].engine)) {
nvkm_subdev_fini(&engine->subdev, false);
WARN_ON(nvkm_subdev_init(&engine->subdev));
}
}
for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
gk104_fifo_runlist_update(fifo, runl);
nvkm_mask(device, 0x002630, runm, 0x00000000);
}
static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
static void
tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
{
struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
struct nvkm_device *device = subdev->device;
const u32 runm = BIT(runl);
assert_spin_locked(&fifo->base.lock);
if (fifo->recover.runm & runm)
return;
fifo->recover.runm |= runm;
/* Block runlist to prevent channel assignment(s) from changing. */
nvkm_mask(device, 0x002630, runm, runm);
/* Schedule recovery. */
nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
schedule_work(&fifo->recover.work);
}
static struct gk104_fifo_chan *
tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
{
struct gk104_fifo_chan *chan;
struct nvkm_fifo_cgrp *cgrp;
list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
if (chan->base.chid == chid) {
list_del_init(&chan->head);
return chan;
}
}
list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
if (cgrp->id == chid) {
chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
list_del_init(&chan->head);
if (!--cgrp->chan_nr)
list_del_init(&cgrp->head);
return chan;
}
}
return NULL;
}
static void
tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid)
{
struct gk104_fifo *fifo = gk104_fifo(base);
struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
struct nvkm_device *device = subdev->device;
const u32 stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
const u32 runl = (stat & 0x000f0000) >> 16;
const bool used = (stat & 0x00000001);
unsigned long engn, engm = fifo->runlist[runl].engm;
struct gk104_fifo_chan *chan;
assert_spin_locked(&fifo->base.lock);
if (!used)
return;
/* Lookup SW state for channel, and mark it as dead. */
chan = tu102_fifo_recover_chid(fifo, runl, chid);
if (chan) {
chan->killed = true;
nvkm_chan_error(&chan->base, false);
}
/* Block channel assignments from changing during recovery. */
tu102_fifo_recover_runl(fifo, runl);
/* Schedule recovery for any engines the channel is on. */
for_each_set_bit(engn, &engm, fifo->engine_nr) {
struct gk104_fifo_engine_status status;
gk104_fifo_engine_status(fifo, engn, &status);
if (!status.chan || status.chan->id != chid)
continue;
tu102_fifo_recover_engn(fifo, engn);
}
}
static void
tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
{
struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
struct nvkm_device *device = subdev->device;
const u32 runl = fifo->engine[engn].runl;
const u32 engm = BIT(engn);
struct gk104_fifo_engine_status status;
assert_spin_locked(&fifo->base.lock);
if (fifo->recover.engm & engm)
return;
fifo->recover.engm |= engm;
/* Block channel assignments from changing during recovery. */
tu102_fifo_recover_runl(fifo, runl);
/* Determine which channel (if any) is currently on the engine. */
gk104_fifo_engine_status(fifo, engn, &status);
if (status.chan) {
/* The channel is not longer viable, kill it. */
tu102_fifo_recover_chan(&fifo->base, status.chan->id);
}
/* Preempt the runlist */
nvkm_wr32(device, 0x2638, BIT(runl));
/* Schedule recovery. */
nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
schedule_work(&fifo->recover.work);
}
const struct nvkm_fifo_func_mmu_fault const struct nvkm_fifo_func_mmu_fault
tu102_fifo_mmu_fault = { tu102_fifo_mmu_fault = {
.recover = gf100_fifo_mmu_fault_recover, .recover = gf100_fifo_mmu_fault_recover,
...@@ -282,22 +134,55 @@ tu102_fifo_mmu_fault = { ...@@ -282,22 +134,55 @@ tu102_fifo_mmu_fault = {
.gpcclient = gv100_fifo_mmu_fault_gpcclient, .gpcclient = gv100_fifo_mmu_fault_gpcclient,
}; };
static void void
tu102_fifo_intr_ctxsw_timeout(struct gk104_fifo *fifo) tu102_fifo_intr_ctxsw_timeout_info(struct nvkm_engn *engn, u32 info)
{ {
struct nvkm_device *device = fifo->base.engine.subdev.device; struct nvkm_runl *runl = engn->runl;
unsigned long flags, engm; struct nvkm_cgrp *cgrp;
u32 engn; unsigned long flags;
/* Check that engine hasn't become unstuck since timeout raised. */
ENGN_DEBUG(engn, "CTXSW_TIMEOUT %08x", info);
if (info & 0xc0000000)
return;
spin_lock_irqsave(&fifo->base.lock, flags); /* Determine channel group the engine is stuck on, and schedule recovery. */
switch (info & 0x0000c000) {
case 0x00004000: /* LOAD */
cgrp = nvkm_runl_cgrp_get_cgid(runl, info & 0x3fff0000, &flags);
break;
case 0x00008000: /* SAVE */
case 0x0000c000: /* SWITCH */
cgrp = nvkm_runl_cgrp_get_cgid(runl, info & 0x00003fff, &flags);
break;
default:
cgrp = NULL;
break;
}
engm = nvkm_rd32(device, 0x2a30); if (!WARN_ON(!cgrp)) {
nvkm_wr32(device, 0x2a30, engm); nvkm_runl_rc_cgrp(cgrp);
nvkm_cgrp_put(&cgrp, flags);
}
}
for_each_set_bit(engn, &engm, 32) static void
tu102_fifo_recover_engn(fifo, engn); tu102_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo)
{
struct nvkm_device *device = fifo->engine.subdev.device;
struct nvkm_runl *runl;
struct nvkm_engn *engn;
u32 engm = nvkm_rd32(device, 0x002a30);
u32 info;
nvkm_runl_foreach(runl, fifo) {
nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id)) {
info = nvkm_rd32(device, 0x003200 + (engn->id * 4));
tu102_fifo_intr_ctxsw_timeout_info(engn, info);
}
}
spin_unlock_irqrestore(&fifo->base.lock, flags); nvkm_wr32(device, 0x002a30, engm);
} }
static void static void
...@@ -326,7 +211,7 @@ tu102_fifo_intr(struct nvkm_inth *inth) ...@@ -326,7 +211,7 @@ tu102_fifo_intr(struct nvkm_inth *inth)
} }
if (stat & 0x00000002) { if (stat & 0x00000002) {
tu102_fifo_intr_ctxsw_timeout(gk104_fifo(fifo)); tu102_fifo_intr_ctxsw_timeout(fifo);
stat &= ~0x00000002; stat &= ~0x00000002;
} }
...@@ -386,11 +271,9 @@ tu102_fifo = { ...@@ -386,11 +271,9 @@ tu102_fifo = {
.runl_ctor = gk104_fifo_runl_ctor, .runl_ctor = gk104_fifo_runl_ctor,
.init = gk104_fifo_init, .init = gk104_fifo_init,
.init_pbdmas = tu102_fifo_init_pbdmas, .init_pbdmas = tu102_fifo_init_pbdmas,
.fini = gk104_fifo_fini,
.intr = tu102_fifo_intr, .intr = tu102_fifo_intr,
.mmu_fault = &tu102_fifo_mmu_fault, .mmu_fault = &tu102_fifo_mmu_fault,
.engine_id = gk104_fifo_engine_id, .engine_id = gk104_fifo_engine_id,
.recover_chan = tu102_fifo_recover_chan,
.runlist = &tu102_fifo_runlist, .runlist = &tu102_fifo_runlist,
.nonstall = &gf100_fifo_nonstall, .nonstall = &gf100_fifo_nonstall,
.runl = &tu102_runl, .runl = &tu102_runl,
...@@ -410,7 +293,6 @@ tu102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, ...@@ -410,7 +293,6 @@ tu102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL))) if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
return -ENOMEM; return -ENOMEM;
fifo->func = &tu102_fifo; fifo->func = &tu102_fifo;
INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work);
*pfifo = &fifo->base; *pfifo = &fifo->base;
return nvkm_fifo_ctor(&tu102_fifo, device, type, inst, &fifo->base); return nvkm_fifo_ctor(&tu102_fifo, device, type, inst, &fifo->base);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment