nouveau_object.c 24.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
/*
 * Copyright (C) 2006 Ben Skeggs.
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

/*
 * Authors:
 *   Ben Skeggs <darktama@iinet.net.au>
 */

#include "drmP.h"
#include "drm.h"
#include "nouveau_drv.h"
#include "nouveau_drm.h"
37
#include "nouveau_ramht.h"
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77

/* NVidia uses context objects to drive drawing operations.

   Context objects can be selected into 8 subchannels in the FIFO,
   and then used via DMA command buffers.

   A context object is referenced by a user defined handle (CARD32). The HW
   looks up graphics objects in a hash table in the instance RAM.

   An entry in the hash table consists of 2 CARD32. The first CARD32 contains
   the handle, the second one a bitfield, that contains the address of the
   object in instance RAM.

   The format of the second CARD32 seems to be:

   NV4 to NV30:

   15: 0  instance_addr >> 4
   17:16  engine (here uses 1 = graphics)
   28:24  channel id (here uses 0)
   31	  valid (use 1)

   NV40:

   15: 0  instance_addr >> 4   (maybe 19-0)
   21:20  engine (here uses 1 = graphics)
   I'm unsure about the other bits, but using 0 seems to work.

   The key into the hash table depends on the object handle and channel id and
   is given as:
*/

int
nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
		   uint32_t size, int align, uint32_t flags,
		   struct nouveau_gpuobj **gpuobj_ret)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_engine *engine = &dev_priv->engine;
	struct nouveau_gpuobj *gpuobj;
78
	struct drm_mm_node *ramin = NULL;
79 80 81 82 83 84 85 86 87 88 89 90
	int ret;

	NV_DEBUG(dev, "ch%d size=%u align=%d flags=0x%08x\n",
		 chan ? chan->id : -1, size, align, flags);

	if (!dev_priv || !gpuobj_ret || *gpuobj_ret != NULL)
		return -EINVAL;

	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
	if (!gpuobj)
		return -ENOMEM;
	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
91
	gpuobj->dev = dev;
92
	gpuobj->flags = flags;
93
	kref_init(&gpuobj->refcount);
94
	gpuobj->size = size;
95

96
	spin_lock(&dev_priv->ramin_lock);
97
	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
98
	spin_unlock(&dev_priv->ramin_lock);
99 100

	if (chan) {
101
		NV_DEBUG(dev, "channel heap\n");
102 103 104 105 106 107 108 109 110

		ramin = drm_mm_search_free(&chan->ramin_heap, size, align, 0);
		if (ramin)
			ramin = drm_mm_get_block(ramin, size, align);

		if (!ramin) {
			nouveau_gpuobj_ref(NULL, &gpuobj);
			return -ENOMEM;
		}
111 112 113
	} else {
		NV_DEBUG(dev, "global heap\n");

114
		/* allocate backing pages, sets vinst */
115 116
		ret = engine->instmem.populate(dev, gpuobj, &size);
		if (ret) {
117
			nouveau_gpuobj_ref(NULL, &gpuobj);
118 119 120
			return ret;
		}

121
		/* try and get aperture space */
122 123 124 125 126 127 128 129 130 131
		do {
			if (drm_mm_pre_get(&dev_priv->ramin_heap))
				return -ENOMEM;

			spin_lock(&dev_priv->ramin_lock);
			ramin = drm_mm_search_free(&dev_priv->ramin_heap, size,
						   align, 0);
			if (ramin == NULL) {
				spin_unlock(&dev_priv->ramin_lock);
				nouveau_gpuobj_ref(NULL, &gpuobj);
132
				return -ENOMEM;
133 134 135 136 137
			}

			ramin = drm_mm_get_block_atomic(ramin, size, align);
			spin_unlock(&dev_priv->ramin_lock);
		} while (ramin == NULL);
138

139 140 141 142 143
		/* on nv50 it's ok to fail, we have a fallback path */
		if (!ramin && dev_priv->card_type < NV_50) {
			nouveau_gpuobj_ref(NULL, &gpuobj);
			return -ENOMEM;
		}
144 145
	}

146 147
	/* if we got a chunk of the aperture, map pages into it */
	gpuobj->im_pramin = ramin;
148
	if (!chan && gpuobj->im_pramin && dev_priv->ramin_available) {
149 150
		ret = engine->instmem.bind(dev, gpuobj);
		if (ret) {
151
			nouveau_gpuobj_ref(NULL, &gpuobj);
152 153 154 155
			return ret;
		}
	}

156 157
	/* calculate the various different addresses for the object */
	if (chan) {
158 159 160 161
		gpuobj->pinst = chan->ramin->pinst;
		if (gpuobj->pinst != ~0)
			gpuobj->pinst += gpuobj->im_pramin->start;

162 163 164 165 166
		if (dev_priv->card_type < NV_50) {
			gpuobj->cinst = gpuobj->pinst;
		} else {
			gpuobj->cinst = gpuobj->im_pramin->start;
			gpuobj->vinst = gpuobj->im_pramin->start +
167
					chan->ramin->vinst;
168 169
		}
	} else {
170 171 172 173
		if (gpuobj->im_pramin)
			gpuobj->pinst = gpuobj->im_pramin->start;
		else
			gpuobj->pinst = ~0;
174 175 176
		gpuobj->cinst = 0xdeadbeef;
	}

177 178 179
	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
		int i;

180
		for (i = 0; i < gpuobj->size; i += 4)
181
			nv_wo32(gpuobj, i, 0);
182
		engine->instmem.flush(dev);
183 184
	}

185

186 187 188 189 190
	*gpuobj_ret = gpuobj;
	return 0;
}

int
191
nouveau_gpuobj_init(struct drm_device *dev)
192 193 194 195 196 197
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;

	NV_DEBUG(dev, "\n");

	INIT_LIST_HEAD(&dev_priv->gpuobj_list);
198 199
	spin_lock_init(&dev_priv->ramin_lock);
	dev_priv->ramin_base = ~0;
200 201 202 203 204 205 206 207 208 209 210

	return 0;
}

void
nouveau_gpuobj_takedown(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;

	NV_DEBUG(dev, "\n");

211
	BUG_ON(!list_empty(&dev_priv->gpuobj_list));
212 213
}

214

215 216
static void
nouveau_gpuobj_del(struct kref *ref)
217
{
218 219
	struct nouveau_gpuobj *gpuobj =
		container_of(ref, struct nouveau_gpuobj, refcount);
220
	struct drm_device *dev = gpuobj->dev;
221 222 223 224
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_engine *engine = &dev_priv->engine;
	int i;

225
	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
226 227

	if (gpuobj->im_pramin && (gpuobj->flags & NVOBJ_FLAG_ZERO_FREE)) {
228
		for (i = 0; i < gpuobj->size; i += 4)
229
			nv_wo32(gpuobj, i, 0);
230
		engine->instmem.flush(dev);
231 232 233 234 235
	}

	if (gpuobj->dtor)
		gpuobj->dtor(dev, gpuobj);

236
	if (gpuobj->im_backing)
237 238
		engine->instmem.clear(dev, gpuobj);

239
	spin_lock(&dev_priv->ramin_lock);
240 241
	if (gpuobj->im_pramin)
		drm_mm_put_block(gpuobj->im_pramin);
242
	list_del(&gpuobj->list);
243
	spin_unlock(&dev_priv->ramin_lock);
244 245 246 247

	kfree(gpuobj);
}

248 249
void
nouveau_gpuobj_ref(struct nouveau_gpuobj *ref, struct nouveau_gpuobj **ptr)
250
{
251
	if (ref)
252
		kref_get(&ref->refcount);
253

254 255
	if (*ptr)
		kref_put(&(*ptr)->refcount, nouveau_gpuobj_del);
256

257
	*ptr = ref;
258 259 260
}

int
261 262
nouveau_gpuobj_new_fake(struct drm_device *dev, u32 pinst, u64 vinst,
			u32 size, u32 flags, struct nouveau_gpuobj **pgpuobj)
263 264 265 266 267 268
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_gpuobj *gpuobj = NULL;
	int i;

	NV_DEBUG(dev,
269 270
		 "pinst=0x%08x vinst=0x%010llx size=0x%08x flags=0x%08x\n",
		 pinst, vinst, size, flags);
271 272 273 274 275

	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
	if (!gpuobj)
		return -ENOMEM;
	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
276
	gpuobj->dev = dev;
277
	gpuobj->flags = flags;
278
	kref_init(&gpuobj->refcount);
279 280
	gpuobj->size  = size;
	gpuobj->pinst = pinst;
281
	gpuobj->cinst = 0xdeadbeef;
282
	gpuobj->vinst = vinst;
283

284
	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
285
		for (i = 0; i < gpuobj->size; i += 4)
286
			nv_wo32(gpuobj, i, 0);
287
		dev_priv->engine.instmem.flush(dev);
288 289
	}

290
	spin_lock(&dev_priv->ramin_lock);
291
	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
292
	spin_unlock(&dev_priv->ramin_lock);
293
	*pgpuobj = gpuobj;
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
	return 0;
}


static uint32_t
nouveau_gpuobj_class_instmem_size(struct drm_device *dev, int class)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;

	/*XXX: dodgy hack for now */
	if (dev_priv->card_type >= NV_50)
		return 24;
	if (dev_priv->card_type >= NV_40)
		return 32;
	return 16;
}

/*
   DMA objects are used to reference a piece of memory in the
   framebuffer, PCI or AGP address space. Each object is 16 bytes big
   and looks as follows:

   entry[0]
   11:0  class (seems like I can always use 0 here)
   12    page table present?
   13    page entry linear?
   15:14 access: 0 rw, 1 ro, 2 wo
   17:16 target: 0 NV memory, 1 NV memory tiled, 2 PCI, 3 AGP
   31:20 dma adjust (bits 0-11 of the address)
   entry[1]
   dma limit (size of transfer)
   entry[X]
   1     0 readonly, 1 readwrite
   31:12 dma frame address of the page (bits 12-31 of the address)
   entry[N]
   page table terminator, same value as the first pte, as does nvidia
   rivatv uses 0xffffffff

   Non linear page tables need a list of frame addresses afterwards,
   the rivatv project has some info on this.

   The method below creates a DMA object in instance RAM and returns a handle
   to it that can be used to set up context objects.
*/
int
nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
		       uint64_t offset, uint64_t size, int access,
		       int target, struct nouveau_gpuobj **gpuobj)
{
	struct drm_device *dev = chan->dev;
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
	int ret;

	NV_DEBUG(dev, "ch%d class=0x%04x offset=0x%llx size=0x%llx\n",
		 chan->id, class, offset, size);
	NV_DEBUG(dev, "access=%d target=%d\n", access, target);

	switch (target) {
	case NV_DMA_TARGET_AGP:
		offset += dev_priv->gart_info.aper_base;
		break;
	default:
		break;
	}

	ret = nouveau_gpuobj_new(dev, chan,
				 nouveau_gpuobj_class_instmem_size(dev, class),
				 16, NVOBJ_FLAG_ZERO_ALLOC |
				 NVOBJ_FLAG_ZERO_FREE, gpuobj);
	if (ret) {
		NV_ERROR(dev, "Error creating gpuobj: %d\n", ret);
		return ret;
	}

	if (dev_priv->card_type < NV_50) {
		uint32_t frame, adjust, pte_flags = 0;

		if (access != NV_DMA_ACCESS_RO)
			pte_flags |= (1<<1);
		adjust = offset &  0x00000fff;
		frame  = offset & ~0x00000fff;

377 378 379 380 381 382
		nv_wo32(*gpuobj,  0, ((1<<12) | (1<<13) | (adjust << 20) |
				      (access << 14) | (target << 16) |
				      class));
		nv_wo32(*gpuobj,  4, size - 1);
		nv_wo32(*gpuobj,  8, frame | pte_flags);
		nv_wo32(*gpuobj, 12, frame | pte_flags);
383 384 385 386 387 388 389 390 391 392 393 394
	} else {
		uint64_t limit = offset + size - 1;
		uint32_t flags0, flags5;

		if (target == NV_DMA_TARGET_VIDMEM) {
			flags0 = 0x00190000;
			flags5 = 0x00010000;
		} else {
			flags0 = 0x7fc00000;
			flags5 = 0x00080000;
		}

395 396 397 398 399 400
		nv_wo32(*gpuobj,  0, flags0 | class);
		nv_wo32(*gpuobj,  4, lower_32_bits(limit));
		nv_wo32(*gpuobj,  8, lower_32_bits(offset));
		nv_wo32(*gpuobj, 12, ((upper_32_bits(limit) & 0xff) << 24) |
				      (upper_32_bits(offset) & 0xff));
		nv_wo32(*gpuobj, 20, flags5);
401 402
	}

403
	instmem->flush(dev);
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430

	(*gpuobj)->engine = NVOBJ_ENGINE_SW;
	(*gpuobj)->class  = class;
	return 0;
}

int
nouveau_gpuobj_gart_dma_new(struct nouveau_channel *chan,
			    uint64_t offset, uint64_t size, int access,
			    struct nouveau_gpuobj **gpuobj,
			    uint32_t *o_ret)
{
	struct drm_device *dev = chan->dev;
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	int ret;

	if (dev_priv->gart_info.type == NOUVEAU_GART_AGP ||
	    (dev_priv->card_type >= NV_50 &&
	     dev_priv->gart_info.type == NOUVEAU_GART_SGDMA)) {
		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
					     offset + dev_priv->vm_gart_base,
					     size, access, NV_DMA_TARGET_AGP,
					     gpuobj);
		if (o_ret)
			*o_ret = 0;
	} else
	if (dev_priv->gart_info.type == NOUVEAU_GART_SGDMA) {
431
		nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma, gpuobj);
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
		if (offset & ~0xffffffffULL) {
			NV_ERROR(dev, "obj offset exceeds 32-bits\n");
			return -EINVAL;
		}
		if (o_ret)
			*o_ret = (uint32_t)offset;
		ret = (*gpuobj != NULL) ? 0 : -EINVAL;
	} else {
		NV_ERROR(dev, "Invalid GART type %d\n", dev_priv->gart_info.type);
		return -EINVAL;
	}

	return ret;
}

/* Context objects in the instance RAM have the following structure.
 * On NV40 they are 32 byte long, on NV30 and smaller 16 bytes.

   NV4 - NV30:

   entry[0]
   11:0 class
   12   chroma key enable
   13   user clip enable
   14   swizzle enable
   17:15 patch config:
       scrcopy_and, rop_and, blend_and, scrcopy, srccopy_pre, blend_pre
   18   synchronize enable
   19   endian: 1 big, 0 little
   21:20 dither mode
   23    single step enable
   24    patch status: 0 invalid, 1 valid
   25    context_surface 0: 1 valid
   26    context surface 1: 1 valid
   27    context pattern: 1 valid
   28    context rop: 1 valid
   29,30 context beta, beta4
   entry[1]
   7:0   mono format
   15:8  color format
   31:16 notify instance address
   entry[2]
   15:0  dma 0 instance address
   31:16 dma 1 instance address
   entry[3]
   dma method traps

   NV40:
   No idea what the exact format is. Here's what can be deducted:

   entry[0]:
   11:0  class  (maybe uses more bits here?)
   17    user clip enable
   21:19 patch config
   25    patch status valid ?
   entry[1]:
   15:0  DMA notifier  (maybe 20:0)
   entry[2]:
   15:0  DMA 0 instance (maybe 20:0)
   24    big endian
   entry[3]:
   15:0  DMA 1 instance (maybe 20:0)
   entry[4]:
   entry[5]:
   set to 0?
*/
int
nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
		      struct nouveau_gpuobj **gpuobj)
{
	struct drm_device *dev = chan->dev;
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	int ret;

	NV_DEBUG(dev, "ch%d class=0x%04x\n", chan->id, class);

	ret = nouveau_gpuobj_new(dev, chan,
				 nouveau_gpuobj_class_instmem_size(dev, class),
				 16,
				 NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE,
				 gpuobj);
	if (ret) {
		NV_ERROR(dev, "Error creating gpuobj: %d\n", ret);
		return ret;
	}

	if (dev_priv->card_type >= NV_50) {
519 520
		nv_wo32(*gpuobj,  0, class);
		nv_wo32(*gpuobj, 20, 0x00010000);
521 522 523
	} else {
		switch (class) {
		case NV_CLASS_NULL:
524 525
			nv_wo32(*gpuobj, 0, 0x00001030);
			nv_wo32(*gpuobj, 4, 0xFFFFFFFF);
526 527 528
			break;
		default:
			if (dev_priv->card_type >= NV_40) {
529
				nv_wo32(*gpuobj, 0, class);
530
#ifdef __BIG_ENDIAN
531
				nv_wo32(*gpuobj, 8, 0x01000000);
532 533 534
#endif
			} else {
#ifdef __BIG_ENDIAN
535
				nv_wo32(*gpuobj, 0, class | 0x00080000);
536
#else
537
				nv_wo32(*gpuobj, 0, class);
538 539 540 541
#endif
			}
		}
	}
542
	dev_priv->engine.instmem.flush(dev);
543 544 545 546 547 548

	(*gpuobj)->engine = NVOBJ_ENGINE_GR;
	(*gpuobj)->class  = class;
	return 0;
}

549
int
550 551 552
nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class,
		      struct nouveau_gpuobj **gpuobj_ret)
{
553
	struct drm_nouveau_private *dev_priv;
554 555 556 557
	struct nouveau_gpuobj *gpuobj;

	if (!chan || !gpuobj_ret || *gpuobj_ret != NULL)
		return -EINVAL;
558
	dev_priv = chan->dev->dev_private;
559 560 561 562

	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
	if (!gpuobj)
		return -ENOMEM;
563
	gpuobj->dev = chan->dev;
564 565
	gpuobj->engine = NVOBJ_ENGINE_SW;
	gpuobj->class = class;
566
	kref_init(&gpuobj->refcount);
567
	gpuobj->cinst = 0x40;
568

569
	spin_lock(&dev_priv->ramin_lock);
570
	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
571
	spin_unlock(&dev_priv->ramin_lock);
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
	*gpuobj_ret = gpuobj;
	return 0;
}

static int
nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
{
	struct drm_device *dev = chan->dev;
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	uint32_t size;
	uint32_t base;
	int ret;

	NV_DEBUG(dev, "ch%d\n", chan->id);

	/* Base amount for object storage (4KiB enough?) */
	size = 0x1000;
	base = 0;

	/* PGRAPH context */
592
	size += dev_priv->engine.graph.grctx_size;
593 594 595 596 597 598 599 600 601 602 603 604

	if (dev_priv->card_type == NV_50) {
		/* Various fixed table thingos */
		size += 0x1400; /* mostly unknown stuff */
		size += 0x4000; /* vm pd */
		base  = 0x6000;
		/* RAMHT, not sure about setting size yet, 32KiB to be safe */
		size += 0x8000;
		/* RAMFC */
		size += 0x1000;
	}

605
	ret = nouveau_gpuobj_new(dev, NULL, size, 0x1000, 0, &chan->ramin);
606 607 608 609 610
	if (ret) {
		NV_ERROR(dev, "Error allocating channel PRAMIN: %d\n", ret);
		return ret;
	}

611
	ret = drm_mm_init(&chan->ramin_heap, base, size);
612 613
	if (ret) {
		NV_ERROR(dev, "Error creating PRAMIN heap: %d\n", ret);
614
		nouveau_gpuobj_ref(NULL, &chan->ramin);
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
		return ret;
	}

	return 0;
}

int
nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
			    uint32_t vram_h, uint32_t tt_h)
{
	struct drm_device *dev = chan->dev;
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
	struct nouveau_gpuobj *vram = NULL, *tt = NULL;
	int ret, i;

	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);

633 634 635 636 637
	/* Allocate a chunk of memory for per-channel object storage */
	ret = nouveau_gpuobj_channel_init_pramin(chan);
	if (ret) {
		NV_ERROR(dev, "init pramin\n");
		return ret;
638 639 640 641 642 643 644 645
	}

	/* NV50 VM
	 *  - Allocate per-channel page-directory
	 *  - Map GART and VRAM into the channel's address space at the
	 *    locations determined during init.
	 */
	if (dev_priv->card_type >= NV_50) {
646 647 648 649
		u32 pgd_offs = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200;
		u64 vm_vinst = chan->ramin->vinst + pgd_offs;
		u32 vm_pinst = chan->ramin->pinst;
		u32 pde;
650

651 652
		if (vm_pinst != ~0)
			vm_pinst += pgd_offs;
653

654
		ret = nouveau_gpuobj_new_fake(dev, vm_pinst, vm_vinst, 0x4000,
655
					      0, &chan->vm_pd);
656
		if (ret)
657 658
			return ret;
		for (i = 0; i < 0x4000; i += 8) {
659 660
			nv_wo32(chan->vm_pd, i + 0, 0x00000000);
			nv_wo32(chan->vm_pd, i + 4, 0xdeadcafe);
661 662
		}

663 664
		nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma,
				   &chan->vm_gart_pt);
665
		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 8;
666
		nv_wo32(chan->vm_pd, pde + 0, chan->vm_gart_pt->vinst | 3);
667
		nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
668

669
		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 8;
670
		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
671 672
			nouveau_gpuobj_ref(dev_priv->vm_vram_pt[i],
					   &chan->vm_vram_pt[i]);
673

674
			nv_wo32(chan->vm_pd, pde + 0,
675
				chan->vm_vram_pt[i]->vinst | 0x61);
676 677
			nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
			pde += 8;
678 679
		}

680
		instmem->flush(dev);
681 682 683 684
	}

	/* RAMHT */
	if (dev_priv->card_type < NV_50) {
685 686 687 688 689 690
		nouveau_ramht_ref(dev_priv->ramht, &chan->ramht, NULL);
	} else {
		struct nouveau_gpuobj *ramht = NULL;

		ret = nouveau_gpuobj_new(dev, chan, 0x8000, 16,
					 NVOBJ_FLAG_ZERO_ALLOC, &ramht);
691 692
		if (ret)
			return ret;
693 694 695

		ret = nouveau_ramht_new(dev, ramht, &chan->ramht);
		nouveau_gpuobj_ref(NULL, &ramht);
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
		if (ret)
			return ret;
	}

	/* VRAM ctxdma */
	if (dev_priv->card_type >= NV_50) {
		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
					     0, dev_priv->vm_end,
					     NV_DMA_ACCESS_RW,
					     NV_DMA_TARGET_AGP, &vram);
		if (ret) {
			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
			return ret;
		}
	} else {
		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
712 713 714
					     0, dev_priv->fb_available_size,
					     NV_DMA_ACCESS_RW,
					     NV_DMA_TARGET_VIDMEM, &vram);
715 716 717 718 719 720
		if (ret) {
			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
			return ret;
		}
	}

721 722
	ret = nouveau_ramht_insert(chan, vram_h, vram);
	nouveau_gpuobj_ref(NULL, &vram);
723
	if (ret) {
724
		NV_ERROR(dev, "Error adding VRAM ctxdma to RAMHT: %d\n", ret);
725 726 727 728 729
		return ret;
	}

	/* TT memory ctxdma */
	if (dev_priv->card_type >= NV_50) {
730 731 732 733 734 735 736 737
		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
					     0, dev_priv->vm_end,
					     NV_DMA_ACCESS_RW,
					     NV_DMA_TARGET_AGP, &tt);
		if (ret) {
			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
			return ret;
		}
738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
	} else
	if (dev_priv->gart_info.type != NOUVEAU_GART_NONE) {
		ret = nouveau_gpuobj_gart_dma_new(chan, 0,
						  dev_priv->gart_info.aper_size,
						  NV_DMA_ACCESS_RW, &tt, NULL);
	} else {
		NV_ERROR(dev, "Invalid GART type %d\n", dev_priv->gart_info.type);
		ret = -EINVAL;
	}

	if (ret) {
		NV_ERROR(dev, "Error creating TT ctxdma: %d\n", ret);
		return ret;
	}

753 754
	ret = nouveau_ramht_insert(chan, tt_h, tt);
	nouveau_gpuobj_ref(NULL, &tt);
755
	if (ret) {
756
		NV_ERROR(dev, "Error adding TT ctxdma to RAMHT: %d\n", ret);
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
		return ret;
	}

	return 0;
}

void
nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
{
	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
	struct drm_device *dev = chan->dev;
	int i;

	NV_DEBUG(dev, "ch%d\n", chan->id);

772
	if (!chan->ramht)
773 774
		return;

775
	nouveau_ramht_ref(NULL, &chan->ramht, chan);
776

777 778
	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
	nouveau_gpuobj_ref(NULL, &chan->vm_gart_pt);
779
	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
780
		nouveau_gpuobj_ref(NULL, &chan->vm_vram_pt[i]);
781

782 783
	if (chan->ramin_heap.free_stack.next)
		drm_mm_takedown(&chan->ramin_heap);
784
	nouveau_gpuobj_ref(NULL, &chan->ramin);
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
}

int
nouveau_gpuobj_suspend(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_gpuobj *gpuobj;
	int i;

	if (dev_priv->card_type < NV_50) {
		dev_priv->susres.ramin_copy = vmalloc(dev_priv->ramin_rsvd_vram);
		if (!dev_priv->susres.ramin_copy)
			return -ENOMEM;

		for (i = 0; i < dev_priv->ramin_rsvd_vram; i += 4)
			dev_priv->susres.ramin_copy[i/4] = nv_ri32(dev, i);
		return 0;
	}

	list_for_each_entry(gpuobj, &dev_priv->gpuobj_list, list) {
805
		if (!gpuobj->im_backing)
806 807
			continue;

808
		gpuobj->im_backing_suspend = vmalloc(gpuobj->size);
809 810 811 812 813
		if (!gpuobj->im_backing_suspend) {
			nouveau_gpuobj_resume(dev);
			return -ENOMEM;
		}

814
		for (i = 0; i < gpuobj->size; i += 4)
815
			gpuobj->im_backing_suspend[i/4] = nv_ro32(gpuobj, i);
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
	}

	return 0;
}

void
nouveau_gpuobj_suspend_cleanup(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_gpuobj *gpuobj;

	if (dev_priv->card_type < NV_50) {
		vfree(dev_priv->susres.ramin_copy);
		dev_priv->susres.ramin_copy = NULL;
		return;
	}

	list_for_each_entry(gpuobj, &dev_priv->gpuobj_list, list) {
		if (!gpuobj->im_backing_suspend)
			continue;

		vfree(gpuobj->im_backing_suspend);
		gpuobj->im_backing_suspend = NULL;
	}
}

void
nouveau_gpuobj_resume(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_gpuobj *gpuobj;
	int i;

	if (dev_priv->card_type < NV_50) {
		for (i = 0; i < dev_priv->ramin_rsvd_vram; i += 4)
			nv_wi32(dev, i, dev_priv->susres.ramin_copy[i/4]);
		nouveau_gpuobj_suspend_cleanup(dev);
		return;
	}

	list_for_each_entry(gpuobj, &dev_priv->gpuobj_list, list) {
		if (!gpuobj->im_backing_suspend)
			continue;

860
		for (i = 0; i < gpuobj->size; i += 4)
861
			nv_wo32(gpuobj, i, gpuobj->im_backing_suspend[i/4]);
862
		dev_priv->engine.instmem.flush(dev);
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
	}

	nouveau_gpuobj_suspend_cleanup(dev);
}

int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
			      struct drm_file *file_priv)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct drm_nouveau_grobj_alloc *init = data;
	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
	struct nouveau_pgraph_object_class *grc;
	struct nouveau_gpuobj *gr = NULL;
	struct nouveau_channel *chan;
	int ret;

	if (init->handle == ~0)
		return -EINVAL;

	grc = pgraph->grclass;
	while (grc->id) {
		if (grc->id == init->class)
			break;
		grc++;
	}

	if (!grc->id) {
		NV_ERROR(dev, "Illegal object class: 0x%x\n", init->class);
		return -EPERM;
	}

894 895 896 897 898 899 900 901
	chan = nouveau_channel_get(dev, file_priv, init->channel);
	if (IS_ERR(chan))
		return PTR_ERR(chan);

	if (nouveau_ramht_find(chan, init->handle)) {
		ret = -EEXIST;
		goto out;
	}
902 903 904 905 906 907 908 909

	if (!grc->software)
		ret = nouveau_gpuobj_gr_new(chan, grc->id, &gr);
	else
		ret = nouveau_gpuobj_sw_new(chan, grc->id, &gr);
	if (ret) {
		NV_ERROR(dev, "Error creating object: %d (%d/0x%08x)\n",
			 ret, init->channel, init->handle);
910
		goto out;
911 912
	}

913 914
	ret = nouveau_ramht_insert(chan, init->handle, gr);
	nouveau_gpuobj_ref(NULL, &gr);
915 916 917 918 919
	if (ret) {
		NV_ERROR(dev, "Error referencing object: %d (%d/0x%08x)\n",
			 ret, init->channel, init->handle);
	}

920 921 922
out:
	nouveau_channel_put(&chan);
	return ret;
923 924 925 926 927 928
}

int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
			      struct drm_file *file_priv)
{
	struct drm_nouveau_gpuobj_free *objfree = data;
929
	struct nouveau_gpuobj *gpuobj;
930
	struct nouveau_channel *chan;
931
	int ret = -ENOENT;
932

933 934 935
	chan = nouveau_channel_get(dev, file_priv, objfree->channel);
	if (IS_ERR(chan))
		return PTR_ERR(chan);
936

937
	gpuobj = nouveau_ramht_find(chan, objfree->handle);
938 939 940 941
	if (gpuobj) {
		nouveau_ramht_remove(chan, objfree->handle);
		ret = 0;
	}
942

943 944
	nouveau_channel_put(&chan);
	return ret;
945
}
946 947 948 949

u32
nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
{
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
	struct drm_device *dev = gpuobj->dev;

	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
		u64  ptr = gpuobj->vinst + offset;
		u32 base = ptr >> 16;
		u32  val;

		spin_lock(&dev_priv->ramin_lock);
		if (dev_priv->ramin_base != base) {
			dev_priv->ramin_base = base;
			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
		}
		val = nv_rd32(dev, 0x700000 + (ptr & 0xffff));
		spin_unlock(&dev_priv->ramin_lock);
		return val;
	}

	return nv_ri32(dev, gpuobj->pinst + offset);
969 970 971 972 973
}

void
nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
{
974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
	struct drm_device *dev = gpuobj->dev;

	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
		u64  ptr = gpuobj->vinst + offset;
		u32 base = ptr >> 16;

		spin_lock(&dev_priv->ramin_lock);
		if (dev_priv->ramin_base != base) {
			dev_priv->ramin_base = base;
			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
		}
		nv_wr32(dev, 0x700000 + (ptr & 0xffff), val);
		spin_unlock(&dev_priv->ramin_lock);
		return;
	}

	nv_wi32(dev, gpuobj->pinst + offset, val);
992
}