/* * Copyright 2001-2003 SuSE Labs. * Distributed under the GNU public license, v2. * * This is a GART driver for the AMD Opteron/Athlon64 on-CPU northbridge. * It also includes support for the AMD 8151 AGP bridge, * although it doesn't actually do much, as all the real * work is done in the northbridge(s). */ #include <linux/config.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/agp_backend.h> #include "agp.h" /* Will need to be increased if AMD64 ever goes >8-way. */ #ifdef CONFIG_SMP #define MAX_HAMMER_GARTS 8 #else #define MAX_HAMMER_GARTS 1 #endif /* PTE bits. */ #define GPTE_VALID 1 #define GPTE_COHERENT 2 /* Aperture control register bits. */ #define GARTEN (1<<0) #define DISGARTCPU (1<<4) #define DISGARTIO (1<<5) /* GART cache control register bits. */ #define INVGART (1<<0) #define GARTPTEERR (1<<1) static int nr_garts; static struct pci_dev * hammers[MAX_HAMMER_GARTS]; static int __initdata agp_try_unsupported; static int gart_iterator; #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++) static void flush_amd64_tlb(struct pci_dev *dev) { u32 tmp; pci_read_config_dword (dev, AMD64_GARTCACHECTL, &tmp); tmp |= INVGART; pci_write_config_dword (dev, AMD64_GARTCACHECTL, tmp); } static void amd64_tlbflush(struct agp_memory *temp) { for_each_nb() flush_amd64_tlb(hammers[gart_iterator]); } static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) { int i, j, num_entries; long tmp; u32 pte; num_entries = agp_num_entries(); if (type != 0 || mem->type != 0) return -EINVAL; /* Make sure we can fit the range in the gatt table. */ /* FIXME: could wrap */ if (((unsigned long)pg_start + mem->page_count) > num_entries) return -EINVAL; j = pg_start; /* gatt table should be empty. */ while (j < (pg_start + mem->page_count)) { if (!PGE_EMPTY(agp_bridge, agp_bridge->gatt_table[j])) return -EBUSY; j++; } if (mem->is_flushed == FALSE) { global_cache_flush(); mem->is_flushed = TRUE; } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { tmp = agp_bridge->driver->mask_memory(mem->memory[i], mem->type); BUG_ON(tmp & 0xffffff0000000ffc); pte = (tmp & 0x000000ff00000000) >> 28; pte |=(tmp & 0x00000000fffff000); pte |= GPTE_VALID | GPTE_COHERENT; agp_bridge->gatt_table[j] = pte; } amd64_tlbflush(mem); return 0; } /* * This hack alters the order element according * to the size of a long. It sucks. I totally disown this, even * though it does appear to work for the most part. */ static struct aper_size_info_32 amd64_aperture_sizes[7] = { {32, 8192, 3+(sizeof(long)/8), 0 }, {64, 16384, 4+(sizeof(long)/8), 1<<1 }, {128, 32768, 5+(sizeof(long)/8), 1<<2 }, {256, 65536, 6+(sizeof(long)/8), 1<<1 | 1<<2 }, {512, 131072, 7+(sizeof(long)/8), 1<<3 }, {1024, 262144, 8+(sizeof(long)/8), 1<<1 | 1<<3}, {2048, 524288, 9+(sizeof(long)/8), 1<<2 | 1<<3} }; /* * Get the current Aperture size from the x86-64. * Note, that there may be multiple x86-64's, but we just return * the value from the first one we find. The set_size functions * keep the rest coherent anyway. Or at least should do. */ static int amd64_fetch_size(void) { struct pci_dev *dev; int i; u32 temp; struct aper_size_info_32 *values; dev = hammers[0]; if (dev==NULL) return 0; pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &temp); temp = (temp & 0xe); values = A_SIZE_32(amd64_aperture_sizes); for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) { if (temp == values[i].size_value) { agp_bridge->previous_size = agp_bridge->current_size = (void *) (values + i); agp_bridge->aperture_size_idx = i; return values[i].size; } } return 0; } /* * In a multiprocessor x86-64 system, this function gets * called once for each CPU. */ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) { u64 aperturebase; u32 tmp; u64 addr, aper_base; /* Address to map to */ pci_read_config_dword (hammer, AMD64_GARTAPERTUREBASE, &tmp); aperturebase = tmp << 25; aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK); /* address of the mappings table */ addr = (u64) gatt_table; addr >>= 12; tmp = (u32) addr<<4; tmp &= ~0xf; pci_write_config_dword (hammer, AMD64_GARTTABLEBASE, tmp); /* Enable GART translation for this hammer. */ pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp); tmp |= GARTEN; tmp &= ~(DISGARTCPU | DISGARTIO); pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); /* keep CPU's coherent. */ flush_amd64_tlb (hammer); return aper_base; } static struct aper_size_info_32 amd_8151_sizes[7] = { {2048, 524288, 9, 0x00000000 }, /* 0 0 0 0 0 0 */ {1024, 262144, 8, 0x00000400 }, /* 1 0 0 0 0 0 */ {512, 131072, 7, 0x00000600 }, /* 1 1 0 0 0 0 */ {256, 65536, 6, 0x00000700 }, /* 1 1 1 0 0 0 */ {128, 32768, 5, 0x00000720 }, /* 1 1 1 1 0 0 */ {64, 16384, 4, 0x00000730 }, /* 1 1 1 1 1 0 */ {32, 8192, 3, 0x00000738 } /* 1 1 1 1 1 1 */ }; static int amd_8151_configure(void) { unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); /* Configure AGP regs in each x86-64 host bridge. */ for_each_nb() { agp_bridge->gart_bus_addr = amd64_configure(hammers[gart_iterator],gatt_bus); } return 0; } static void amd64_cleanup(void) { u32 tmp; for_each_nb() { /* disable gart translation */ pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); tmp &= ~AMD64_GARTEN; pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp); } } struct agp_bridge_driver amd_8151_driver = { .owner = THIS_MODULE, .aperture_sizes = amd_8151_sizes, .size_type = U32_APER_SIZE, .num_aperture_sizes = 7, .configure = amd_8151_configure, .fetch_size = amd64_fetch_size, .cleanup = amd64_cleanup, .tlb_flush = amd64_tlbflush, .mask_memory = agp_generic_mask_memory, .masks = NULL, .agp_enable = agp_generic_enable, .cache_flush = global_cache_flush, .create_gatt_table = agp_generic_create_gatt_table, .free_gatt_table = agp_generic_free_gatt_table, .insert_memory = amd64_insert_memory, .remove_memory = agp_generic_remove_memory, .alloc_by_type = agp_generic_alloc_by_type, .free_by_type = agp_generic_free_by_type, .agp_alloc_page = agp_generic_alloc_page, .agp_destroy_page = agp_generic_destroy_page, }; /* Some basic sanity checks for the aperture. */ static int __init aperture_valid(u64 aper, u32 size) { static int not_first_call; u32 pfn, c; if (aper == 0) { printk(KERN_ERR PFX "No aperture\n"); return 0; } if (size < 32*1024*1024) { printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20); return 0; } if (aper + size > 0xffffffff) { printk(KERN_ERR PFX "Aperture out of bounds\n"); return 0; } pfn = aper >> PAGE_SHIFT; for (c = 0; c < size/PAGE_SIZE; c++) { if (!pfn_valid(pfn + c)) break; if (!PageReserved(pfn_to_page(pfn + c))) { printk(KERN_ERR PFX "Aperture pointing to RAM\n"); return 0; } } /* Request the Aperture. This catches cases when someone else already put a mapping in there - happens with some very broken BIOS Maybe better to use pci_assign_resource/pci_enable_device instead trusting the bridges? */ if (!not_first_call && request_mem_region(aper, size, "aperture") < 0) { printk(KERN_ERR PFX "Aperture conflicts with PCI mapping.\n"); return 0; } not_first_call = 1; return 1; } /* * W*s centric BIOS sometimes only set up the aperture in the AGP * bridge, not the northbridge. On AMD64 this is handled early * in aperture.c, but when GART_IOMMU is not enabled or we run * on a 32bit kernel this needs to be redone. * Unfortunately it is impossible to fix the aperture here because it's too late * to allocate that much memory. But at least error out cleanly instead of * crashing. */ static __init int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, u16 cap) { u32 aper_low, aper_hi; u64 aper, nb_aper; int order = 0; u32 nb_order, nb_base; u16 apsize; pci_read_config_dword(nb, 0x90, &nb_order); nb_order = (nb_order >> 1) & 7; pci_read_config_dword(nb, 0x94, &nb_base); nb_aper = nb_base << 25; if (aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) { return 0; } /* Northbridge seems to contain crap. Try the AGP bridge. */ pci_read_config_word(agp, cap+0x14, &apsize); if (apsize == 0xffff) return -1; apsize &= 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ if (apsize & 0xff) apsize |= 0xf00; order = 7 - hweight16(apsize); pci_read_config_dword(agp, 0x10, &aper_low); pci_read_config_dword(agp, 0x14, &aper_hi); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); printk(KERN_INFO PFX "Aperture from AGP @ %Lx size %u MB\n", aper, 32 << order); if (order < 0 || !aperture_valid(aper, (32*1024*1024)<<order)) return -1; pci_write_config_dword(nb, 0x90, order << 1); pci_write_config_dword(nb, 0x94, aper >> 25); return 0; } static __init int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) { struct pci_dev *loop_dev = NULL; int i = 0; /* cache pci_devs of northbridges. */ while ((loop_dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev)) != NULL) { if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) { printk(KERN_INFO PFX "No usable aperture found.\n"); #ifdef __x86_64__ /* should port this to i386 */ printk(KERN_INFO PFX "Consider rebooting with iommu=memaper=2 to get a good aperture.\n"); #endif return -1; } hammers[i++] = loop_dev; nr_garts = i; if (i == MAX_HAMMER_GARTS) { printk(KERN_INFO PFX "Too many northbridges for AGP\n"); return -1; } } return i == 0 ? -1 : 0; } static int __init agp_amd64_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct agp_bridge_data *bridge; u8 rev_id; u8 cap_ptr; char *revstring=NULL; cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); if (!cap_ptr) return -ENODEV; /* Could check for AGPv3 here */ bridge = agp_alloc_bridge(); if (!bridge) return -ENOMEM; if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == PCI_DEVICE_ID_AMD_8151_0) { pci_read_config_byte(pdev, PCI_REVISION_ID, &rev_id); switch (rev_id) { case 0x01: revstring="A0"; break; case 0x02: revstring="A1"; break; case 0x11: revstring="B0"; break; case 0x12: revstring="B1"; break; case 0x13: revstring="B2"; break; default: revstring="??"; break; } printk (KERN_INFO PFX "Detected AMD 8151 AGP Bridge rev %s\n", revstring); /* * Work around errata. * Chips before B2 stepping incorrectly reporting v3.5 */ if (rev_id < 0x13) { printk (KERN_INFO PFX "Correcting AGP revision (reports 3.5, is really 3.0)\n"); bridge->major_version = 3; bridge->minor_version = 0; } } else { printk(KERN_INFO PFX "Detected AGP bridge %x\n", pdev->devfn); } bridge->driver = &amd_8151_driver; bridge->dev = pdev; bridge->capndx = cap_ptr; /* Fill in the mode register */ pci_read_config_dword(pdev, bridge->capndx+PCI_AGP_STATUS, &bridge->mode); if (cache_nbs(pdev, cap_ptr) == -1) { agp_put_bridge(bridge); return -ENODEV; } pci_set_drvdata(pdev, bridge); return agp_add_bridge(bridge); } static void __devexit agp_amd64_remove(struct pci_dev *pdev) { struct agp_bridge_data *bridge = pci_get_drvdata(pdev); release_mem_region(virt_to_phys(bridge->gatt_table_real), amd64_aperture_sizes[bridge->aperture_size_idx].size); agp_remove_bridge(bridge); agp_put_bridge(bridge); } static struct pci_device_id agp_amd64_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, .vendor = PCI_VENDOR_ID_AMD, .device = PCI_DEVICE_ID_AMD_8151_0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, /* VIA K8T800 */ { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, .vendor = PCI_VENDOR_ID_VIA, .device = PCI_DEVICE_ID_VIA_8385_0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, /* VIA K8M800 / K8N800 */ { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, .vendor = PCI_VENDOR_ID_VIA, .device = PCI_DEVICE_ID_VIA_8380_0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, .vendor = PCI_VENDOR_ID_SI, .device = PCI_DEVICE_ID_SI_755, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, { } }; MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table); static struct pci_driver agp_amd64_pci_driver = { .name = "agpgart-amd64", .id_table = agp_amd64_pci_table, .probe = agp_amd64_probe, .remove = agp_amd64_remove, }; /* Not static due to IOMMU code calling it early. */ int __init agp_amd64_init(void) { int err = 0; if (agp_off) return -EINVAL; if (pci_module_init(&agp_amd64_pci_driver) > 0) { struct pci_dev *dev; if (!agp_try_unsupported && !agp_try_unsupported_boot) { printk(KERN_INFO PFX "No supported AGP bridge found.\n"); #ifdef MODULE printk(KERN_INFO PFX "You can try agp_try_unsupported=1\n"); #else printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n"); #endif return -ENODEV; } /* First check that we have at least one AMD64 NB */ if (!pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, NULL)) return -ENODEV; /* Look for any AGP bridge */ dev = NULL; err = -ENODEV; while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev))) { if (!pci_find_capability(dev, PCI_CAP_ID_AGP)) continue; /* Only one bridge supported right now */ if (agp_amd64_probe(dev, NULL) == 0) { err = 0; break; } } } return err; } static void __exit agp_amd64_cleanup(void) { pci_unregister_driver(&agp_amd64_pci_driver); } /* On AMD64 the PCI driver needs to initialize this driver early for the IOMMU, so it has to be called via a backdoor. */ #ifndef CONFIG_GART_IOMMU module_init(agp_amd64_init); module_exit(agp_amd64_cleanup); #endif MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen"); MODULE_PARM(agp_try_unsupported, "1i"); MODULE_LICENSE("GPL and additional rights");