Commit 2527083c authored by Michael Ellerman's avatar Michael Ellerman

Merge tag 'powerpc-4.5-4' into next

Pull in our current fixes from 4.5, in particular the "Fix Multi hit
ERAT" bug is causing folks some grief when testing next.
parents a4c3f909 9ab3ac23
...@@ -557,7 +557,7 @@ choice ...@@ -557,7 +557,7 @@ choice
config PPC_4K_PAGES config PPC_4K_PAGES
bool "4k page size" bool "4k page size"
select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_16K_PAGES config PPC_16K_PAGES
bool "16k page size" bool "16k page size"
...@@ -566,7 +566,7 @@ config PPC_16K_PAGES ...@@ -566,7 +566,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES config PPC_64K_PAGES
bool "64k page size" bool "64k page size"
depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES config PPC_256K_PAGES
bool "256k page size" bool "256k page size"
......
...@@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); ...@@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp); pmd_t *pmdp);
#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#define pmd_move_must_withdraw pmd_move_must_withdraw #define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock; struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
......
...@@ -81,6 +81,7 @@ struct pci_dn; ...@@ -81,6 +81,7 @@ struct pci_dn;
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */ #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
struct eeh_pe { struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */ int type; /* PE type: PHB/Bus/Device */
......
...@@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, ...@@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
extern void hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void); extern void hcall_tracepoint_unregfunc(void);
TRACE_EVENT_FN(hcall_entry, TRACE_EVENT_FN_COND(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args), TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args), TP_ARGS(opcode, args),
TP_CONDITION(cpu_online(raw_smp_processor_id())),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, opcode) __field(unsigned long, opcode)
), ),
...@@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry, ...@@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry,
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
); );
TRACE_EVENT_FN(hcall_exit, TRACE_EVENT_FN_COND(hcall_exit,
TP_PROTO(unsigned long opcode, unsigned long retval, TP_PROTO(unsigned long opcode, unsigned long retval,
unsigned long *retbuf), unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf), TP_ARGS(opcode, retval, retbuf),
TP_CONDITION(cpu_online(raw_smp_processor_id())),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, opcode) __field(unsigned long, opcode)
__field(unsigned long, retval) __field(unsigned long, retval)
......
...@@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata) ...@@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
eeh_pcid_put(dev); eeh_pcid_put(dev);
if (driver->err_handler && if (driver->err_handler &&
driver->err_handler->error_detected && driver->err_handler->error_detected &&
driver->err_handler->slot_reset && driver->err_handler->slot_reset)
driver->err_handler->resume)
return NULL; return NULL;
} }
...@@ -564,6 +563,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) ...@@ -564,6 +563,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
*/ */
eeh_pe_state_mark(pe, EEH_PE_KEEP); eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) { if (bus) {
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove(); pci_lock_rescan_remove();
pcibios_remove_pci_devices(bus); pcibios_remove_pci_devices(bus);
pci_unlock_rescan_remove(); pci_unlock_rescan_remove();
...@@ -803,6 +803,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -803,6 +803,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
* the their PCI config any more. * the their PCI config any more.
*/ */
if (frozen_bus) { if (frozen_bus) {
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
pci_lock_rescan_remove(); pci_lock_rescan_remove();
...@@ -886,6 +887,7 @@ static void eeh_handle_special_event(void) ...@@ -886,6 +887,7 @@ static void eeh_handle_special_event(void)
continue; continue;
/* Notify all devices to be down */ /* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
bus = eeh_pe_bus_get(phb_pe); bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe, eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL); eeh_report_failure, NULL);
......
...@@ -928,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) ...@@ -928,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
bus = pe->phb->bus; bus = pe->phb->bus;
} else if (pe->type & EEH_PE_BUS || } else if (pe->type & EEH_PE_BUS ||
pe->type & EEH_PE_DEVICE) { pe->type & EEH_PE_DEVICE) {
if (pe->bus) { if (pe->state & EEH_PE_PRI_BUS) {
bus = pe->bus; bus = pe->bus;
goto out; goto out;
} }
......
...@@ -340,7 +340,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) ...@@ -340,7 +340,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
if (name[0] == '.') { if (name[0] == '.') {
if (strcmp(name+1, "TOC.") == 0) if (strcmp(name+1, "TOC.") == 0)
syms[i].st_shndx = SHN_ABS; syms[i].st_shndx = SHN_ABS;
memmove(name, name+1, strlen(name)); syms[i].st_name++;
} }
} }
} }
......
...@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, ...@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
*/ */
if (!(old_pte & _PAGE_COMBO)) { if (!(old_pte & _PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags); flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND; /*
* clear the old slot details from the old and new pte.
* On hash insert failure we use old pte value and we don't
* want slot information there if we have a insert failure.
*/
old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
goto htab_insert_hpte; goto htab_insert_hpte;
} }
/* /*
......
...@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, ...@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* base page size. This is because demote_segment won't flush * base page size. This is because demote_segment won't flush
* hash page table entries. * hash page table entries.
*/ */
if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
ssize, flags); ssize, flags);
/*
* With THP, we also clear the slot information with
* respect to all the 64K hash pte mapping the 16MB
* page. They are all invalid now. This make sure we
* don't find the slot valid when we fault with 4k
* base page size.
*
*/
memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
}
} }
valid = hpte_valid(hpte_slot_array, index); valid = hpte_valid(hpte_slot_array, index);
......
...@@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) ...@@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
return pgtable; return pgtable;
} }
void pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
/*
* We can't mark the pmd none here, because that will cause a race
* against exit_mmap. We need to continue mark pmd TRANS HUGE, while
* we spilt, but at the same time we wan't rest of the ppc64 code
* not to insert hash pte on this, because we will be modifying
* the deposited pgtable in the caller of this function. Hence
* clear the _PAGE_USER so that we move the fault handling to
* higher level function and that will serialize against ptl.
* We need to flush existing hash pte entries here even though,
* the translation is still valid, because we will withdraw
* pgtable_t after this.
*/
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
}
/* /*
* set a new huge pmd. We should not be called for updating * set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update. * an existing pmd entry. That should go via pmd_hugepage_update.
...@@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, ...@@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
} }
/*
* We use this to invalidate a pmdp entry before switching from a
* hugepte to regular pmd entry.
*/
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp) pmd_t *pmdp)
{ {
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
/*
* This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected.
*/
kick_all_cpus_sync();
} }
/* /*
......
...@@ -428,9 +428,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) ...@@ -428,9 +428,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* PCI devices of the PE are expected to be removed prior * PCI devices of the PE are expected to be removed prior
* to PE reset. * to PE reset.
*/ */
if (!edev->pe->bus) if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
edev->pe->bus = pci_find_bus(hose->global_number, edev->pe->bus = pci_find_bus(hose->global_number,
pdn->busno); pdn->busno);
if (edev->pe->bus)
edev->pe->state |= EEH_PE_PRI_BUS;
}
/* /*
* Enable EEH explicitly so that we will do EEH check * Enable EEH explicitly so that we will do EEH check
......
...@@ -3211,6 +3211,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) ...@@ -3211,6 +3211,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup, .dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI #ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs, .setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs, .teardown_msi_irqs = pnv_teardown_msi_irqs,
......
...@@ -596,6 +596,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, ...@@ -596,6 +596,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
u64 rpn = __pa(uaddr) >> tbl->it_page_shift; u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
long i; long i;
if (proto_tce & TCE_PCI_WRITE)
proto_tce |= TCE_PCI_READ;
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
unsigned long newtce = proto_tce | unsigned long newtce = proto_tce |
((rpn + i) << tbl->it_page_shift); ((rpn + i) << tbl->it_page_shift);
...@@ -617,6 +620,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, ...@@ -617,6 +620,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
if (newtce & TCE_PCI_WRITE)
newtce |= TCE_PCI_READ;
oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
*hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
*direction = iommu_tce_direction(oldtce); *direction = iommu_tce_direction(oldtce);
...@@ -757,6 +763,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) ...@@ -757,6 +763,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
phb->dma_dev_setup(phb, pdev); phb->dma_dev_setup(phb, pdev);
} }
void pnv_pci_dma_bus_setup(struct pci_bus *bus)
{
struct pci_controller *hose = bus->sysdata;
struct pnv_phb *phb = hose->private_data;
struct pnv_ioda_pe *pe;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
continue;
if (!pe->pbus)
continue;
if (bus->number == ((pe->rid >> 8) & 0xFF)) {
pe->pbus = bus;
break;
}
}
}
void pnv_pci_shutdown(void) void pnv_pci_shutdown(void)
{ {
struct pci_controller *hose; struct pci_controller *hose;
......
...@@ -232,6 +232,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); ...@@ -232,6 +232,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
......
...@@ -239,6 +239,14 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, ...@@ -239,6 +239,14 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp); pmd_t *pmdp);
#endif #endif
#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
}
#endif
#ifndef __HAVE_ARCH_PTE_SAME #ifndef __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b) static inline int pte_same(pte_t pte_a, pte_t pte_b)
{ {
......
...@@ -2860,6 +2860,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -2860,6 +2860,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
young = pmd_young(*pmd); young = pmd_young(*pmd);
dirty = pmd_dirty(*pmd); dirty = pmd_dirty(*pmd);
pmdp_huge_split_prepare(vma, haddr, pmd);
pgtable = pgtable_trans_huge_withdraw(mm, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable); pmd_populate(mm, &_pmd, pgtable);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment