Commit fc360764 authored by Ben Skeggs's avatar Ben Skeggs
Browse files

drm/nouveau/gr/gf100-: virtualise tpc_mask + apply fixes from traces



We weren't placing higher TPC IDs in the right place on some configurations.

Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent aa5e38dc
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1368,6 +1368,10 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
		func->gpc_tpc_nr(gr);
	if (func->r419f78)
		func->r419f78(gr);
	if (func->tpc_mask)
		func->tpc_mask(gr);
	if (func->smid_config)
		func->smid_config(gr);
}

void
+6 −6
Original line number Diff line number Diff line
@@ -61,6 +61,8 @@ struct gf100_grctx_func {
	void (*r406500)(struct gf100_gr *);
	void (*gpc_tpc_nr)(struct gf100_gr *);
	void (*r419f78)(struct gf100_gr *);
	void (*tpc_mask)(struct gf100_gr *);
	void (*smid_config)(struct gf100_gr *);
};

extern const struct gf100_grctx_func gf100_grctx;
@@ -103,11 +105,6 @@ void gk104_grctx_generate_pagepool(struct gf100_grctx *);
void gk104_grctx_generate_patch_ltc(struct gf100_grctx *);
void gk104_grctx_generate_unkn(struct gf100_gr *);

void gm107_grctx_generate_bundle(struct gf100_grctx *);
void gm107_grctx_generate_pagepool(struct gf100_grctx *);
void gm107_grctx_generate_attrib(struct gf100_grctx *);
void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);

extern const struct gf100_grctx_func gk110_grctx;
extern const struct gf100_grctx_func gk110b_grctx;
extern const struct gf100_grctx_func gk208_grctx;
@@ -116,17 +113,20 @@ extern const struct gf100_grctx_func gm107_grctx;
void gm107_grctx_generate_bundle(struct gf100_grctx *);
void gm107_grctx_generate_pagepool(struct gf100_grctx *);
void gm107_grctx_generate_attrib(struct gf100_grctx *);
void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);

extern const struct gf100_grctx_func gm200_grctx;
void gm200_grctx_generate_dist_skip_table(struct gf100_gr *);
void gm200_grctx_generate_r406500(struct gf100_gr *);
void gm200_grctx_generate_405b60(struct gf100_gr *);
void gm200_grctx_generate_tpc_mask(struct gf100_gr *);
void gm200_grctx_generate_smid_config(struct gf100_gr *);

extern const struct gf100_grctx_func gm20b_grctx;

extern const struct gf100_grctx_func gp100_grctx;
void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
void gp100_grctx_generate_pagepool(struct gf100_grctx *);
void gp100_grctx_generate_smid_config(struct gf100_gr *);

extern const struct gf100_grctx_func gp102_grctx;
void gp102_grctx_generate_attrib(struct gf100_grctx *);
+13 −9
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@
 ******************************************************************************/

void
gm200_grctx_generate_405b60(struct gf100_gr *gr)
gm200_grctx_generate_smid_config(struct gf100_gr *gr)
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
@@ -59,6 +59,15 @@ gm200_grctx_generate_405b60(struct gf100_gr *gr)
		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
}

void
gm200_grctx_generate_tpc_mask(struct gf100_gr *gr)
{
	u32 tmp, i;
	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * gr->func->tpc_nr);
	nvkm_wr32(gr->base.engine.subdev.device, 0x4041c4, tmp);
}

void
gm200_grctx_generate_r406500(struct gf100_gr *gr)
{
@@ -70,8 +79,7 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const struct gf100_grctx_func *grctx = gr->func->grctx;
	u32 idle_timeout, tmp;
	int i;
	u32 idle_timeout;

	gf100_gr_mmio(gr, gr->fuc_sw_ctx);

@@ -84,12 +92,6 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)

	gf100_grctx_generate_floorsweep(gr);

	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
	nvkm_wr32(device, 0x4041c4, tmp);

	gm200_grctx_generate_405b60(gr);

	gf100_gr_icmd(gr, gr->fuc_bundle);
	nvkm_wr32(device, 0x404154, idle_timeout);
	gf100_gr_mthd(gr, gr->fuc_method);
@@ -140,4 +142,6 @@ gm200_grctx = {
	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
	.r406500 = gm200_grctx_generate_r406500,
	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
	.tpc_mask = gm200_grctx_generate_tpc_mask,
	.smid_config = gm200_grctx_generate_smid_config,
};
+1 −1
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
	nvkm_wr32(device, 0x4041c4, tmp);

	gm200_grctx_generate_405b60(gr);
	gm200_grctx_generate_smid_config(gr);

	gf100_gr_wait_idle(gr);

+8 −14
Original line number Diff line number Diff line
@@ -89,13 +89,12 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
	mmio_wr32(info, 0x41befc, 0x00000000);
}

static void
gp100_grctx_generate_405b60(struct gf100_gr *gr)
void
gp100_grctx_generate_smid_config(struct gf100_gr *gr)
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
	u32 dist[TPC_MAX / 4] = {};
	u32 gpcs[GPC_MAX * 2] = {};
	u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
	u8  tpcnr[GPC_MAX];
	int tpc, gpc, i;

@@ -112,12 +111,12 @@ gp100_grctx_generate_405b60(struct gf100_gr *gr)
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;

		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
		gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
		gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
	}

	for (i = 0; i < dist_nr; i++)
		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
	for (i = 0; i < gr->gpc_nr * 2; i++)
	for (i = 0; i < ARRAY_SIZE(gpcs); i++)
		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
}

@@ -126,8 +125,7 @@ gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const struct gf100_grctx_func *grctx = gr->func->grctx;
	u32 idle_timeout, tmp;
	int i;
	u32 idle_timeout;

	gf100_gr_mmio(gr, gr->fuc_sw_ctx);

@@ -140,12 +138,6 @@ gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)

	gf100_grctx_generate_floorsweep(gr);

	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
	nvkm_wr32(device, 0x4041c4, tmp);

	gp100_grctx_generate_405b60(gr);

	gf100_gr_icmd(gr, gr->fuc_bundle);
	nvkm_wr32(device, 0x404154, idle_timeout);
	gf100_gr_mthd(gr, gr->fuc_method);
@@ -171,4 +163,6 @@ gp100_grctx = {
	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
	.r406500 = gm200_grctx_generate_r406500,
	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
	.tpc_mask = gm200_grctx_generate_tpc_mask,
	.smid_config = gp100_grctx_generate_smid_config,
};
Loading