Commit 5f6474a4 authored by Ben Skeggs's avatar Ben Skeggs

drm/nouveau/gr/gf100-: port tile mapping calculations from NVGPU

There's also a couple of hardcoded tables for a couple of very specific
configurations that NVGPU's algorithm didn't work for.
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent 5c05a589
......@@ -1116,27 +1116,14 @@ gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
int gpc, tpc, i;
/* calculate first set of magics */
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
gpc = -1;
for (tpc = 0; tpc < gr->tpc_total; tpc++) {
do {
gpc = (gpc + 1) % gr->gpc_nr;
} while (!tpcnr[gpc]);
tpcnr[gpc]--;
data[tpc / 6] |= gpc << ((tpc % 6) * 5);
}
int i;
for (; tpc < 32; tpc++)
data[tpc / 6] |= 7 << ((tpc % 6) * 5);
/* Pack tile map into register format. */
for (i = 0; i < 32; i++)
data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
/* and the second... */
/* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
......
......@@ -194,27 +194,14 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
int gpc, tpc, i;
/* calculate first set of magics */
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
gpc = -1;
for (tpc = 0; tpc < gr->tpc_total; tpc++) {
do {
gpc = (gpc + 1) % gr->gpc_nr;
} while (!tpcnr[gpc]);
tpcnr[gpc]--;
data[tpc / 6] |= gpc << ((tpc % 6) * 5);
}
int i;
for (; tpc < 32; tpc++)
data[tpc / 6] |= 7 << ((tpc % 6) * 5);
/* Pack tile map into register format. */
for (i = 0; i < 32; i++)
data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
/* and the second... */
/* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
......
......@@ -1652,6 +1652,82 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
return ret;
}
void
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
{
static const u8 primes[] = {
3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
};
int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
u32 mul_factor, comm_denom;
u8 gpc_map[GPC_MAX];
bool sorted;
switch (gr->tpc_total) {
case 15: gr->screen_tile_row_offset = 0x06; break;
case 14: gr->screen_tile_row_offset = 0x05; break;
case 13: gr->screen_tile_row_offset = 0x02; break;
case 11: gr->screen_tile_row_offset = 0x07; break;
case 10: gr->screen_tile_row_offset = 0x06; break;
case 7:
case 5: gr->screen_tile_row_offset = 0x01; break;
case 3: gr->screen_tile_row_offset = 0x02; break;
case 2:
case 1: gr->screen_tile_row_offset = 0x01; break;
default: gr->screen_tile_row_offset = 0x03;
for (i = 0; i < ARRAY_SIZE(primes); i++) {
if (gr->tpc_total % primes[i]) {
gr->screen_tile_row_offset = primes[i];
break;
}
}
break;
}
/* Sort GPCs by TPC count, highest-to-lowest. */
for (i = 0; i < gr->gpc_nr; i++)
gpc_map[i] = i;
sorted = false;
while (!sorted) {
for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
if (gr->tpc_nr[gpc_map[i + 1]] >
gr->tpc_nr[gpc_map[i + 0]]) {
u8 swap = gpc_map[i];
gpc_map[i + 0] = gpc_map[i + 1];
gpc_map[i + 1] = swap;
sorted = false;
}
}
}
/* Determine tile->GPC mapping */
mul_factor = gr->gpc_nr * gr->tpc_max;
if (mul_factor & 1)
mul_factor = 2;
else
mul_factor = 1;
comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
for (i = 0; i < gr->gpc_nr; i++) {
init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
run_err[i] = init_frac[i] + init_err[i];
}
for (i = 0; i < gr->tpc_total;) {
for (j = 0; j < gr->gpc_nr; j++) {
if ((run_err[j] * 2) >= comm_denom) {
gr->tile[i++] = gpc_map[j];
run_err[j] += init_frac[j] - comm_denom;
} else {
run_err[j] += init_frac[j];
}
}
}
}
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
......@@ -1691,45 +1767,8 @@ gf100_gr_oneinit(struct nvkm_gr *base)
}
}
/*XXX: these need figuring out... though it might not even matter */
switch (device->chipset) {
case 0xc0:
if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
gr->screen_tile_row_offset = 0x07;
} else
if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
gr->screen_tile_row_offset = 0x05;
} else
if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
gr->screen_tile_row_offset = 0x06;
}
break;
case 0xc3: /* 450, 4/0/0/0, 2 */
gr->screen_tile_row_offset = 0x03;
break;
case 0xc4: /* 460, 3/4/0/0, 4 */
gr->screen_tile_row_offset = 0x01;
break;
case 0xc1: /* 2/0/0/0, 1 */
gr->screen_tile_row_offset = 0x01;
break;
case 0xc8: /* 4/4/3/4, 5 */
gr->screen_tile_row_offset = 0x06;
break;
case 0xce: /* 4/4/0/0, 4 */
gr->screen_tile_row_offset = 0x03;
break;
case 0xcf: /* 4/0/0/0, 3 */
gr->screen_tile_row_offset = 0x03;
break;
case 0xd7:
case 0xd9: /* 1/0/0/0, 1 */
case 0xea: /* gk20a */
case 0x12b: /* gm20b */
gr->screen_tile_row_offset = 0x01;
break;
}
memset(gr->tile, 0xff, sizeof(gr->tile));
gr->func->oneinit_tiles(gr);
return 0;
}
......@@ -2164,6 +2203,7 @@ gf100_gr_gpccs_ucode = {
static const struct gf100_gr_func
gf100_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -107,12 +107,13 @@ struct gf100_gr {
u8 ppc_tpc_nr[GPC_MAX][4];
u8 ppc_tpc_min;
u8 screen_tile_row_offset;
u8 tile[TPC_MAX];
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size;
u32 *data;
u8 screen_tile_row_offset;
};
int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
......@@ -123,6 +124,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);
struct gf100_gr_func {
void (*dtor)(struct gf100_gr *);
void (*oneinit_tiles)(struct gf100_gr *);
int (*init)(struct gf100_gr *);
void (*init_gpc_mmu)(struct gf100_gr *);
void (*init_r405a14)(struct gf100_gr *);
......@@ -164,6 +166,7 @@ struct gf100_gr_func {
};
int gf100_gr_rops(struct gf100_gr *);
void gf100_gr_oneinit_tiles(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
......@@ -191,6 +194,7 @@ void gm107_gr_init_400054(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);
void gm200_gr_oneinit_tiles(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
......
......@@ -114,6 +114,7 @@ gf104_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf104_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -111,6 +111,7 @@ gf108_gr_init_r405a14(struct gf100_gr *gr)
static const struct gf100_gr_func
gf108_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_r405a14 = gf108_gr_init_r405a14,
......
......@@ -86,6 +86,7 @@ gf110_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf110_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -150,6 +150,7 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
static const struct gf100_gr_func
gf117_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -177,6 +177,7 @@ gf119_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf119_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
......
......@@ -448,6 +448,7 @@ gk104_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk104_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -350,6 +350,7 @@ gk110_gr_init_419eb4(struct gf100_gr *gr)
static const struct gf100_gr_func
gk110_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -102,6 +102,7 @@ gk110b_gr_pack_mmio[] = {
static const struct gf100_gr_func
gk110b_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -161,6 +161,7 @@ gk208_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk208_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -282,6 +282,7 @@ gk20a_gr_init(struct gf100_gr *gr)
static const struct gf100_gr_func
gk20a_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
......
......@@ -391,6 +391,7 @@ gm107_gr_gpccs_ucode = {
static const struct gf100_gr_func
gm107_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
......
......@@ -77,6 +77,46 @@ gm200_gr_init_rop_active_fbps(struct gf100_gr *gr)
nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
}
static u8
gm200_gr_tile_map_6_24[] = {
0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2,
};
static u8
gm200_gr_tile_map_4_16[] = {
0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0,
};
static u8
gm200_gr_tile_map_2_8[] = {
0, 1, 1, 0, 0, 1, 1, 0,
};
void
gm200_gr_oneinit_tiles(struct gf100_gr *gr)
{
/*XXX: Not sure what this is about. The algorithm from NVGPU
* seems to work for all boards I tried from earlier (and
* later) GPUs except in these specific configurations.
*
* Let's just hardcode them for now.
*/
if (gr->gpc_nr == 2 && gr->tpc_total == 8) {
memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total);
gr->screen_tile_row_offset = 1;
} else
if (gr->gpc_nr == 4 && gr->tpc_total == 16) {
memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total);
gr->screen_tile_row_offset = 4;
} else
if (gr->gpc_nr == 6 && gr->tpc_total == 24) {
memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total);
gr->screen_tile_row_offset = 5;
} else {
gf100_gr_oneinit_tiles(gr);
}
}
int
gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
int index, struct nvkm_gr **pgr)
......@@ -117,6 +157,7 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
static const struct gf100_gr_func
gm200_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
......
......@@ -64,6 +64,7 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gm20b_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
......
......@@ -64,6 +64,7 @@ gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
static const struct gf100_gr_func
gp100_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -42,6 +42,7 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gp102_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -26,6 +26,7 @@
static const struct gf100_gr_func
gp104_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -28,6 +28,7 @@
static const struct gf100_gr_func
gp107_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
......@@ -27,6 +27,7 @@
static const struct gf100_gr_func
gp10b_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment