Commit eacd0e95 authored by Vineet Gupta's avatar Vineet Gupta

ARC: [mm] Lazy D-cache flush (non aliasing VIPT)

flush_dcache_page( ) is MM hook to ensure that a page has consistent
views between kernel and userspace. Thus it is called when

* kernel writes to a page which at some later point could get mapped to
  userspace (so kernel mapping needs to be flushed-n-inv)
* kernel is about to read from a page with possible userspace mappings
  (so userspace mappings needs to be made coherent with kernel ones)

However for Non aliasing VIPT dcache, any userspace mapping will always
be congruent to kernel mapping. Thus d-cache need need not be flushed at
all (or delayed indefinitely).

The only reason it does need to be flushed is when mapping code pages.
Since icache doesn't snoop dcache, those dirty dcache lines need to be
written back to memory and icache line invalidated so that icache lines
fetch will get the right data.

Decent gains on LMBench fork/exec/sh and File I/O micro-benchmarks.

(1) FPGA @ 80 MHZ

Processor, Processes - times in microseconds - smaller is better
------------------------------------------------------------------------------
Host                 OS  Mhz null null      open slct sig  sig  fork exec sh
                             call  I/O stat clos TCP  inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
3.9-rc6-a Linux 3.9.0-r   80 4.79 8.72 66.7 116. 239. 8.39 30.4 4798 14.K 34.K
3.9-rc6-b Linux 3.9.0-r   80 4.79 8.62 65.4 111. 239. 8.35 29.0 3995 12.K 30.K
3.9-rc7-c Linux 3.9.0-r   80 4.79 9.00 66.1 106. 239. 8.61 30.4 2858 10.K 24.K
                                                                ^^^^ ^^^^ ^^^

File & VM system latencies in microseconds - smaller is better
-------------------------------------------------------------------------------
Host                 OS   0K File      10K File     Mmap    Prot   Page 100fd
                        Create Delete Create Delete Latency Fault  Fault selct
--------- ------------- ------ ------ ------ ------ ------- ----- ------- -----
3.9-rc6-a Linux 3.9.0-r  317.8  204.2 1122.3  375.1 3522.0 4.288     20.7 126.8
3.9-rc6-b Linux 3.9.0-r  298.7  223.0 1141.6  367.8 3531.0 4.866     20.9 126.4
3.9-rc7-c Linux 3.9.0-r  278.4  179.2  862.1  339.3 3705.0 3.223     20.3 126.6
                         ^^^^^  ^^^^^  ^^^^^  ^^^^

(2) Customer Silicon @ 500 MHz (166 MHz mem)

------------------------------------------------------------------------------
Host                 OS  Mhz null null      open slct sig  sig  fork exec sh
                             call  I/O stat clos TCP  inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
abilis-ba Linux 3.9.0-r  497 0.71 1.38 4.58 12.0 35.5 1.40 3.89 2070 5525 13.K
abilis-ca Linux 3.9.0-r  497 0.71 1.40 4.61 11.8 35.6 1.37 3.92 1411 4317 10.K
                                                                ^^^^ ^^^^ ^^^
Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
parent 764531cc
...@@ -33,6 +33,7 @@ void flush_cache_all(void); ...@@ -33,6 +33,7 @@ void flush_cache_all(void);
void flush_icache_range(unsigned long start, unsigned long end); void flush_icache_range(unsigned long start, unsigned long end);
void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
void __inv_icache_page(unsigned long paddr, unsigned long vaddr); void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
void __flush_dcache_page(unsigned long paddr);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
......
...@@ -457,10 +457,10 @@ static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr, ...@@ -457,10 +457,10 @@ static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
* Exported APIs * Exported APIs
*/ */
/* TBD: use pg_arch_1 to optimize this */
void flush_dcache_page(struct page *page) void flush_dcache_page(struct page *page)
{ {
__dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH); /* Make a note that dcache is not yet flushed for this page */
set_bit(PG_arch_1, &page->flags);
} }
EXPORT_SYMBOL(flush_dcache_page); EXPORT_SYMBOL(flush_dcache_page);
...@@ -570,6 +570,11 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) ...@@ -570,6 +570,11 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
} }
void __flush_dcache_page(unsigned long paddr)
{
__dc_line_op(paddr, PAGE_SIZE, OP_FLUSH_N_INV);
}
void flush_icache_all(void) void flush_icache_all(void)
{ {
unsigned long flags; unsigned long flags;
......
...@@ -418,9 +418,10 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) ...@@ -418,9 +418,10 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
local_irq_restore(flags); local_irq_restore(flags);
} }
/* arch hook called by core VM at the end of handle_mm_fault( ), /*
* when a new PTE is entered in Page Tables or an existing one * Called at the end of pagefault, for a userspace mapped page
* is modified. We aggresively pre-install a TLB entry * -pre-install the corresponding TLB entry into MMU
* -Finalize the delayed D-cache flush (wback+inv kernel mapping)
*/ */
void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
pte_t *ptep) pte_t *ptep)
...@@ -431,9 +432,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, ...@@ -431,9 +432,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
/* icache doesn't snoop dcache, thus needs to be made coherent here */ /* icache doesn't snoop dcache, thus needs to be made coherent here */
if (vma->vm_flags & VM_EXEC) { if (vma->vm_flags & VM_EXEC) {
struct page *page = pfn_to_page(pte_pfn(*ptep));
/* if page was dcache dirty, flush now */
int dirty = test_and_clear_bit(PG_arch_1, &page->flags);
if (dirty) {
unsigned long paddr = pte_val(*ptep) & PAGE_MASK; unsigned long paddr = pte_val(*ptep) & PAGE_MASK;
__flush_dcache_page(paddr);
__inv_icache_page(paddr, vaddr); __inv_icache_page(paddr, vaddr);
} }
}
} }
/* Read the Cache Build Confuration Registers, Decode them and save into /* Read the Cache Build Confuration Registers, Decode them and save into
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment