Commit 068cae74 authored by Ben Skeggs's avatar Ben Skeggs

drm/nouveau/gr/gf100-: calculate and use sm mapping table

There's a number of places that require this data, so let's separate out
the calculations to ensure they remain consistent.

This is incorrect for GM200 and newer, but will produce the same results
as we did before.
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent d00ffc0c
......@@ -1092,23 +1092,18 @@ gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
struct nvkm_device *device = gr->base.engine.subdev.device;
const u8 gpcmax = nvkm_rd32(device, 0x022430);
const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
u8 tpcnr[GPC_MAX], data[TPC_MAX];
int gpc, tpc, i;
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
memset(data, 0x1f, sizeof(data));
gpc = -1;
for (tpc = 0; tpc < gr->tpc_total; tpc++) {
do {
gpc = (gpc + 1) % gr->gpc_nr;
} while (!tpcnr[gpc]);
tpcnr[gpc]--;
data[tpc] = gpc;
}
int i, j, sm = 0;
u32 data;
for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++)
nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
for (data = 0, j = 0; j < 4; j++) {
if (sm < gr->sm_nr)
data |= gr->sm[sm++].gpc << (j * 8);
else
data |= 0x1f << (j * 8);
}
nvkm_wr32(device, 0x4060a8 + (i * 4), data);
}
}
void
......@@ -1326,16 +1321,13 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
const struct gf100_grctx_func *func = gr->func->grctx;
int tpc, gpc, sm, i, j;
int gpc, sm, i, j;
u32 data;
for (tpc = 0, sm = 0; tpc < gr->tpc_max; tpc++) {
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
if (tpc < gr->tpc_nr[gpc])
func->sm_id(gr, gpc, tpc, sm++);
if (func->tpc_nr)
func->tpc_nr(gr, gpc);
}
for (sm = 0; sm < gr->sm_nr; sm++) {
func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
if (func->tpc_nr)
func->tpc_nr(gr, gr->sm[sm].gpc);
}
for (gpc = 0, i = 0; i < 4; i++) {
......
......@@ -49,23 +49,13 @@ gm200_grctx_generate_smid_config(struct gf100_gr *gr)
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
u32 dist[TPC_MAX / 4] = {};
u32 gpcs[GPC_MAX] = {};
u8 tpcnr[GPC_MAX];
int tpc, gpc, i;
u8 sm, i;
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
/* won't result in the same distribution as the binary driver where
* some of the gpcs have more tpcs than others, but this shall do
* for the moment. the code for earlier gpus has this issue too.
*/
for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
do {
gpc = (gpc + 1) % gr->gpc_nr;
} while(!tpcnr[gpc]);
tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
gpcs[gpc] |= i << (tpc * 8);
for (sm = 0; sm < gr->sm_nr; sm++) {
const u8 gpc = gr->sm[sm].gpc;
const u8 tpc = gr->sm[sm].tpc;
dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
gpcs[gpc] |= sm << (tpc * 8);
}
for (i = 0; i < dist_nr; i++)
......
......@@ -95,23 +95,13 @@ gp100_grctx_generate_smid_config(struct gf100_gr *gr)
struct nvkm_device *device = gr->base.engine.subdev.device;
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
u8 tpcnr[GPC_MAX];
int tpc, gpc, i;
u8 sm, i;
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
/* won't result in the same distribution as the binary driver where
* some of the gpcs have more tpcs than others, but this shall do
* for the moment. the code for earlier gpus has this issue too.
*/
for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
do {
gpc = (gpc + 1) % gr->gpc_nr;
} while(!tpcnr[gpc]);
tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
for (sm = 0; sm < gr->sm_nr; sm++) {
const u8 gpc = gr->sm[sm].gpc;
const u8 tpc = gr->sm[sm].tpc;
dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8);
}
for (i = 0; i < dist_nr; i++)
......
......@@ -1652,6 +1652,21 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
return ret;
}
void
gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
{
int tpc, gpc;
for (tpc = 0; tpc < gr->tpc_max; tpc++) {
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
if (tpc < gr->tpc_nr[gpc]) {
gr->sm[gr->sm_nr].gpc = gpc;
gr->sm[gr->sm_nr].tpc = tpc;
gr->sm_nr++;
}
}
}
}
void
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
{
......@@ -1769,6 +1784,7 @@ gf100_gr_oneinit(struct nvkm_gr *base)
memset(gr->tile, 0xff, sizeof(gr->tile));
gr->func->oneinit_tiles(gr);
gr->func->oneinit_sm_id(gr);
return 0;
}
......@@ -2204,6 +2220,7 @@ gf100_gr_gpccs_ucode = {
static const struct gf100_gr_func
gf100_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -110,6 +110,12 @@ struct gf100_gr {
u8 screen_tile_row_offset;
u8 tile[TPC_MAX];
struct {
u8 gpc;
u8 tpc;
} sm[TPC_MAX];
u8 sm_nr;
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size;
......@@ -125,6 +131,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);
struct gf100_gr_func {
void (*dtor)(struct gf100_gr *);
void (*oneinit_tiles)(struct gf100_gr *);
void (*oneinit_sm_id)(struct gf100_gr *);
int (*init)(struct gf100_gr *);
void (*init_gpc_mmu)(struct gf100_gr *);
void (*init_r405a14)(struct gf100_gr *);
......@@ -167,6 +174,7 @@ struct gf100_gr_func {
int gf100_gr_rops(struct gf100_gr *);
void gf100_gr_oneinit_tiles(struct gf100_gr *);
void gf100_gr_oneinit_sm_id(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
......@@ -195,6 +203,7 @@ void gm107_gr_init_400054(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);
void gm200_gr_oneinit_tiles(struct gf100_gr *);
void gm200_gr_oneinit_sm_id(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
......
......@@ -115,6 +115,7 @@ gf104_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf104_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -112,6 +112,7 @@ gf108_gr_init_r405a14(struct gf100_gr *gr)
static const struct gf100_gr_func
gf108_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_r405a14 = gf108_gr_init_r405a14,
......
......@@ -87,6 +87,7 @@ gf110_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf110_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -151,6 +151,7 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
static const struct gf100_gr_func
gf117_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -178,6 +178,7 @@ gf119_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf119_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -449,6 +449,7 @@ gk104_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk104_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -351,6 +351,7 @@ gk110_gr_init_419eb4(struct gf100_gr *gr)
static const struct gf100_gr_func
gk110_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -103,6 +103,7 @@ gk110b_gr_pack_mmio[] = {
static const struct gf100_gr_func
gk110b_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -162,6 +162,7 @@ gk208_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk208_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -283,6 +283,7 @@ gk20a_gr_init(struct gf100_gr *gr)
static const struct gf100_gr_func
gk20a_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
......
......@@ -392,6 +392,7 @@ gm107_gr_gpccs_ucode = {
static const struct gf100_gr_func
gm107_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
......
......@@ -92,6 +92,13 @@ gm200_gr_tile_map_2_8[] = {
0, 1, 1, 0, 0, 1, 1, 0,
};
void
gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
{
/*XXX: There's a different algorithm here I've not yet figured out. */
gf100_gr_oneinit_sm_id(gr);
}
void
gm200_gr_oneinit_tiles(struct gf100_gr *gr)
{
......@@ -158,6 +165,7 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
static const struct gf100_gr_func
gm200_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
......
......@@ -65,6 +65,7 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gm20b_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
......
......@@ -65,6 +65,7 @@ gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
static const struct gf100_gr_func
gp100_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -43,6 +43,7 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gp102_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -27,6 +27,7 @@
static const struct gf100_gr_func
gp104_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -29,6 +29,7 @@
static const struct gf100_gr_func
gp107_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -28,6 +28,7 @@
static const struct gf100_gr_func
gp10b_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment