Commit c745a8a1 authored by Chris Metcalf's avatar Chris Metcalf

arch/tile: Various cleanups.

This change rolls up random cleanups not representing any actual bugs.

- Remove a stale CONFIG_ value from the default tile_defconfig
- Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h>
- Optimize get_order() using Tile's "clz" instruction
- Fix a bad hypervisor upcall name (not currently used in Linux anyway)
- Use __copy_in_user_inatomic() name for consistency, and export it
- Export some additional hypervisor driver I/O upcalls and some homecache calls
- Remove the obfuscating MEMCPY_TEST_WH64 support code
- Other stray comment cleanups, #if 0 removal, etc.
Signed-off-by: default avatarChris Metcalf <cmetcalf@tilera.com>
parent 1fcbe027
...@@ -231,7 +231,6 @@ CONFIG_HARDWALL=y ...@@ -231,7 +231,6 @@ CONFIG_HARDWALL=y
CONFIG_MEMPROF=y CONFIG_MEMPROF=y
CONFIG_XGBE=y CONFIG_XGBE=y
CONFIG_NET_TILE=y CONFIG_NET_TILE=y
CONFIG_PSEUDO_NAPI=y
CONFIG_TILEPCI_ENDP=y CONFIG_TILEPCI_ENDP=y
CONFIG_TILEPCI_HOST_SUBSET=m CONFIG_TILEPCI_HOST_SUBSET=m
CONFIG_TILE_IDE_GPIO=y CONFIG_TILE_IDE_GPIO=y
......
...@@ -255,43 +255,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) ...@@ -255,43 +255,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
#define smp_mb__after_atomic_dec() do { } while (0) #define smp_mb__after_atomic_dec() do { } while (0)
#define smp_mb__after_atomic_inc() do { } while (0) #define smp_mb__after_atomic_inc() do { } while (0)
/*
* Support "tns" atomic integers. These are atomic integers that can
* hold any value but "1". They are more efficient than regular atomic
* operations because the "lock" (aka acquire) step is a single "tns"
* in the uncontended case, and the "unlock" (aka release) step is a
* single "store" without an mf. (However, note that on tilepro the
* "tns" will evict the local cache line, so it's not all upside.)
*
* Note that you can ONLY observe the value stored in the pointer
* using these operations; a direct read of the value may confusingly
* return the special value "1".
*/
int __tns_atomic_acquire(atomic_t *);
void __tns_atomic_release(atomic_t *p, int v);
static inline void tns_atomic_set(atomic_t *v, int i)
{
__tns_atomic_acquire(v);
__tns_atomic_release(v, i);
}
static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n)
{
int ret = __tns_atomic_acquire(v);
__tns_atomic_release(v, (ret == o) ? n : ret);
return ret;
}
static inline int tns_atomic_xchg(atomic_t *v, int n)
{
int ret = __tns_atomic_acquire(v);
__tns_atomic_release(v, n);
return ret;
}
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
/* /*
......
...@@ -129,6 +129,11 @@ static inline u64 pmd_val(pmd_t pmd) ...@@ -129,6 +129,11 @@ static inline u64 pmd_val(pmd_t pmd)
#endif #endif
static inline __attribute_const__ int get_order(unsigned long size)
{
return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT);
}
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
...@@ -332,7 +337,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); ...@@ -332,7 +337,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
(VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#include <asm-generic/memory_model.h> #include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -389,14 +389,14 @@ static inline unsigned long __must_check copy_from_user(void *to, ...@@ -389,14 +389,14 @@ static inline unsigned long __must_check copy_from_user(void *to,
* Returns number of bytes that could not be copied. * Returns number of bytes that could not be copied.
* On success, this will be zero. * On success, this will be zero.
*/ */
extern unsigned long __copy_in_user_asm( extern unsigned long __copy_in_user_inatomic(
void __user *to, const void __user *from, unsigned long n); void __user *to, const void __user *from, unsigned long n);
static inline unsigned long __must_check static inline unsigned long __must_check
__copy_in_user(void __user *to, const void __user *from, unsigned long n) __copy_in_user(void __user *to, const void __user *from, unsigned long n)
{ {
might_sleep(); might_sleep();
return __copy_in_user_asm(to, from, n); return __copy_in_user_inatomic(to, from, n);
} }
static inline unsigned long __must_check static inline unsigned long __must_check
......
...@@ -532,11 +532,11 @@ void hv_disable_intr(HV_IntrMask disab_mask); ...@@ -532,11 +532,11 @@ void hv_disable_intr(HV_IntrMask disab_mask);
*/ */
void hv_clear_intr(HV_IntrMask clear_mask); void hv_clear_intr(HV_IntrMask clear_mask);
/** Assert a set of device interrupts. /** Raise a set of device interrupts.
* *
* @param assert_mask Bitmap of interrupts to clear. * @param raise_mask Bitmap of interrupts to raise.
*/ */
void hv_assert_intr(HV_IntrMask assert_mask); void hv_raise_intr(HV_IntrMask raise_mask);
/** Trigger a one-shot interrupt on some tile /** Trigger a one-shot interrupt on some tile
* *
...@@ -1712,7 +1712,7 @@ typedef struct ...@@ -1712,7 +1712,7 @@ typedef struct
* @param cache_control This argument allows you to specify a length of * @param cache_control This argument allows you to specify a length of
* physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN). * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
* You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache. * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
* You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache. * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache.
* HV_FLUSH_ALL flushes all caches. * HV_FLUSH_ALL flushes all caches.
* @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
* tile indices to perform cache flush on. The low bit of the first * tile indices to perform cache flush on. The low bit of the first
......
...@@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \ ...@@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \
memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
strchr_$(BITS).o strlen_$(BITS).o strchr_$(BITS).o strlen_$(BITS).o
ifneq ($(CONFIG_TILEGX),y) ifeq ($(CONFIG_TILEGX),y)
lib-y += memcpy_user_64.o
else
lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
endif endif
......
...@@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm); ...@@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm);
EXPORT_SYMBOL(current_text_addr); EXPORT_SYMBOL(current_text_addr);
EXPORT_SYMBOL(dump_stack); EXPORT_SYMBOL(dump_stack);
/* arch/tile/lib/__memcpy.S */ /* arch/tile/lib/, various memcpy files */
/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__copy_to_user_inatomic); EXPORT_SYMBOL(__copy_to_user_inatomic);
EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(__copy_from_user_inatomic);
EXPORT_SYMBOL(__copy_from_user_zeroing); EXPORT_SYMBOL(__copy_from_user_zeroing);
#ifdef __tilegx__
EXPORT_SYMBOL(__copy_in_user_inatomic);
#endif
/* hypervisor glue */ /* hypervisor glue */
#include <hv/hypervisor.h> #include <hv/hypervisor.h>
EXPORT_SYMBOL(hv_dev_open); EXPORT_SYMBOL(hv_dev_open);
EXPORT_SYMBOL(hv_dev_pread); EXPORT_SYMBOL(hv_dev_pread);
EXPORT_SYMBOL(hv_dev_pwrite); EXPORT_SYMBOL(hv_dev_pwrite);
EXPORT_SYMBOL(hv_dev_preada);
EXPORT_SYMBOL(hv_dev_pwritea);
EXPORT_SYMBOL(hv_dev_poll);
EXPORT_SYMBOL(hv_dev_poll_cancel);
EXPORT_SYMBOL(hv_dev_close); EXPORT_SYMBOL(hv_dev_close);
EXPORT_SYMBOL(hv_sysconf);
EXPORT_SYMBOL(hv_confstr);
/* -ltile-cc */ /* libgcc.a */
uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__udivsi3);
int32_t __divsi3(int32_t dividend, int32_t divisor); int32_t __divsi3(int32_t dividend, int32_t divisor);
...@@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3); ...@@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3);
#ifndef __tilegx__ #ifndef __tilegx__
uint64_t __ll_mul(uint64_t n0, uint64_t n1); uint64_t __ll_mul(uint64_t n0, uint64_t n1);
EXPORT_SYMBOL(__ll_mul); EXPORT_SYMBOL(__ll_mul);
#endif
#ifndef __tilegx__
int64_t __muldi3(int64_t, int64_t); int64_t __muldi3(int64_t, int64_t);
EXPORT_SYMBOL(__muldi3); EXPORT_SYMBOL(__muldi3);
uint64_t __lshrdi3(uint64_t, unsigned int); uint64_t __lshrdi3(uint64_t, unsigned int);
......
...@@ -17,10 +17,6 @@ ...@@ -17,10 +17,6 @@
#include <arch/chip.h> #include <arch/chip.h>
#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64)
#define MEMCPY_USE_WH64
#endif
#include <linux/linkage.h> #include <linux/linkage.h>
...@@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } ...@@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
{ addi r3, r1, 60; andi r9, r9, -64 } { addi r3, r1, 60; andi r9, r9, -64 }
#ifdef MEMCPY_USE_WH64 #if CHIP_HAS_WH64()
/* No need to prefetch dst, we'll just do the wh64 /* No need to prefetch dst, we'll just do the wh64
* right before we copy a line. * right before we copy a line.
*/ */
...@@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 } ...@@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 }
/* Intentionally stall for a few cycles to leave L2 cache alone. */ /* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, . } { bnzt zero, . }
EX: { lw r7, r3; addi r3, r3, 64 } EX: { lw r7, r3; addi r3, r3, 64 }
#ifndef MEMCPY_USE_WH64 #if !CHIP_HAS_WH64()
/* Prefetch the dest */ /* Prefetch the dest */
/* Intentionally stall for a few cycles to leave L2 cache alone. */ /* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, . } { bnzt zero, . }
...@@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 } ...@@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 }
/* Fill second L1D line. */ /* Fill second L1D line. */
EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
#ifdef MEMCPY_TEST_WH64 #if CHIP_HAS_WH64()
/* Issue a fake wh64 that clobbers the destination words
* with random garbage, for testing.
*/
{ movei r19, 64; crc32_32 r10, r2, r9 }
.Lwh64_test_loop:
EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 }
{ bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 }
#elif CHIP_HAS_WH64()
/* Prepare destination line for writing. */ /* Prepare destination line for writing. */
EX: { wh64 r9; addi r9, r9, 64 } EX: { wh64 r9; addi r9, r9, 64 }
#else #else
...@@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ ...@@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
#ifdef MEMCPY_USE_WH64 #if CHIP_HAS_WH64()
EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
#else #else
/* Back up the r9 to a cache line we are already storing to /* Back up the r9 to a cache line we are already storing to
......
...@@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n) ...@@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n)
*/ */
__insn_prefetch(&out32[ahead32]); __insn_prefetch(&out32[ahead32]);
#if 1
#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 #if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
#error "Unhandled CACHE_LINE_SIZE_IN_WORDS" #error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
#endif #endif
...@@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n) ...@@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n)
*out32++ = v32; *out32++ = v32;
*out32++ = v32; *out32++ = v32;
} }
#else
/* Unfortunately, due to a code generator flaw this
* allocates a separate register for each of these
* stores, which requires a large number of spills,
* which makes this procedure enormously bigger
* (something like 70%)
*/
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
*out32++ = v32;
n32 -= 16;
#endif
/* To save compiled code size, reuse this loop even /* To save compiled code size, reuse this loop even
* when we run out of prefetching to do by dropping * when we run out of prefetching to do by dropping
......
...@@ -567,6 +567,14 @@ static int handle_page_fault(struct pt_regs *regs, ...@@ -567,6 +567,14 @@ static int handle_page_fault(struct pt_regs *regs,
* since that might indicate we have not yet squirreled the SPR * since that might indicate we have not yet squirreled the SPR
* contents away and can thus safely take a recursive interrupt. * contents away and can thus safely take a recursive interrupt.
* Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2. * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2.
*
* Note that this routine is called before homecache_tlb_defer_enter(),
* which means that we can properly unlock any atomics that might
* be used there (good), but also means we must be very sensitive
* to not touch any data structures that might be located in memory
* that could migrate, as we could be entering the kernel on a dataplane
* cpu that has been deferring kernel TLB updates. This means, for
* example, that we can't migrate init_mm or its pgd.
*/ */
struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
unsigned long address, unsigned long address,
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/timex.h> #include <linux/timex.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/module.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/sections.h> #include <asm/sections.h>
...@@ -348,6 +349,7 @@ pte_t pte_set_home(pte_t pte, int home) ...@@ -348,6 +349,7 @@ pte_t pte_set_home(pte_t pte, int home)
return pte; return pte;
} }
EXPORT_SYMBOL(pte_set_home);
/* /*
* The routines in this section are the "static" versions of the normal * The routines in this section are the "static" versions of the normal
...@@ -403,6 +405,7 @@ struct page *homecache_alloc_pages(gfp_t gfp_mask, ...@@ -403,6 +405,7 @@ struct page *homecache_alloc_pages(gfp_t gfp_mask,
homecache_change_page_home(page, order, home); homecache_change_page_home(page, order, home);
return page; return page;
} }
EXPORT_SYMBOL(homecache_alloc_pages);
struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
unsigned int order, int home) unsigned int order, int home)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment