diff options
Diffstat (limited to 'target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch')
-rw-r--r-- | target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch b/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch new file mode 100644 index 0000000000..c2efe5be22 --- /dev/null +++ b/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch @@ -0,0 +1,117 @@ +Flushing the L1 D cache with a test/clean/invalidate loop is very +easy in software, but it is not the quickest way of doing it, as +there is a lot of overhead involved in re-scanning the cache from +the beginning every time we hit a dirty line. + +This patch makes proc-feroceon.S use "clean+invalidate by set/way" +loops according to possible cache configuration of Feroceon CPUs +(either direct-mapped or 4-way set associative). + +[nico: optimized the assembly a bit] + +Signed-off-by: Lennert Buytenhek <buytenh@marvell.com> +Signed-off-by: Nicolas Pitre <nico@marvell.com> +--- + arch/arm/mm/proc-feroceon.S | 53 ++++++++++++++++++++++++++++++++++--------- + 1 files changed, 42 insertions(+), 11 deletions(-) + +--- a/arch/arm/mm/proc-feroceon.S ++++ b/arch/arm/mm/proc-feroceon.S +@@ -44,11 +44,31 @@ + */ + #define CACHE_DLINESIZE 32 + ++ .bss ++ .align 3 ++__cache_params_loc: ++ .space 8 ++ + .text ++__cache_params: ++ .word __cache_params_loc ++ + /* + * cpu_feroceon_proc_init() + */ + ENTRY(cpu_feroceon_proc_init) ++ mrc p15, 0, r0, c0, c0, 1 @ read cache type register ++ ldr r1, __cache_params ++ mov r2, #(16 << 5) ++ tst r0, #(1 << 16) @ get way ++ mov r0, r0, lsr #18 @ get cache size order ++ movne r3, #((4 - 1) << 30) @ 4-way ++ and r0, r0, #0xf ++ moveq r3, #0 @ 1-way ++ mov r2, r2, lsl r0 @ actual cache size ++ movne r2, r2, lsr #2 @ turned into # of sets ++ sub r2, r2, #(1 << 5) ++ stmia r1, {r2, r3} + mov pc, lr + + /* +@@ -117,11 +137,19 @@ + */ + ENTRY(feroceon_flush_kern_cache_all) + mov r2, #VM_EXEC +- mov ip, #0 ++ + __flush_whole_cache: +-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +- bne 1b ++ ldr r1, __cache_params ++ ldmia r1, {r1, r3} ++1: orr ip, r1, r3 ++2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way ++ subs ip, ip, #(1 << 30) @ next way ++ bcs 2b ++ subs r1, r1, #(1 << 5) @ next set ++ bcs 1b ++ + tst r2, #VM_EXEC ++ mov ip, #0 + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr +@@ -138,7 +166,6 @@ + */ + .align 5 + ENTRY(feroceon_flush_user_cache_range) +- mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +@@ -152,6 +179,7 @@ + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC ++ mov ip, #0 + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +@@ -306,16 +334,19 @@ + .align 5 + ENTRY(cpu_feroceon_switch_mm) + #ifdef CONFIG_MMU +- mov ip, #0 +-@ && 'Clean & Invalidate whole DCache' +-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +- bne 1b +- mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache +- mcr p15, 0, ip, c7, c10, 4 @ drain WB ++ mov r2, lr @ abuse r2 to preserve lr ++ bl __flush_whole_cache ++ @ if r2 contains the VM_EXEC bit then the next 2 ops are done already ++ tst r2, #VM_EXEC ++ mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache ++ mcreq p15, 0, ip, c7, c10, 4 @ drain WB ++ + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +-#endif ++ mov pc, r2 ++#else + mov pc, lr ++#endif + + /* + * cpu_feroceon_set_pte_ext(ptep, pte, ext) |