diff -drupN a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S --- a/arch/arm64/mm/cache.S 2018-08-06 17:23:04.000000000 +0300 +++ b/arch/arm64/mm/cache.S 2022-06-12 05:28:14.000000000 +0300 @@ -23,6 +23,79 @@ #include #include #include +#include +/* + * __flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: x0-x7, x9-x11 + */ +__flush_dcache_all: + dmb sy // ensure ordering with previous memory accesses + mrs x0, clidr_el1 // read clidr + and x3, x0, #0x7000000 // extract loc from clidr + lsr x3, x3, #23 // left align loc bit field + cbz x3, finished // if loc is 0, then no need to clean + mov x10, #0 // start clean at cache level 0 +loop1: + add x2, x10, x10, lsr #1 // work out 3x current cache level + lsr x1, x0, x2 // extract cache type bits from clidr + and x1, x1, #7 // mask of the bits for current cache only + cmp x1, #2 // see what cache we have at this level + b.lt skip // skip if no cache, or just i-cache + save_and_disable_irq x9 // make CSSELR and CCSIDR access atomic + msr csselr_el1, x10 // select current cache level in csselr + isb // isb to sych the new cssr&csidr + mrs x1, ccsidr_el1 // read the new ccsidr + restore_irq x9 + and x2, x1, #7 // extract the length of the cache lines + add x2, x2, #4 // add 4 (line length offset) + mov x4, #0x3ff + and x4, x4, x1, lsr #3 // find maximum number on the way size + clz w5, w4 // find bit position of way size increment + mov x7, #0x7fff + and x7, x7, x1, lsr #13 // extract max number of the index size +loop2: + mov x9, x4 // create working copy of max way size +loop3: + lsl x6, x9, x5 + orr x11, x10, x6 // factor way and cache number into x11 + lsl x6, x7, x2 + orr x11, x11, x6 // factor index number into x11 + dc cisw, x11 // clean & invalidate by set/way + subs x9, x9, #1 // decrement the way + b.ge loop3 + subs x7, x7, #1 // decrement the index + b.ge loop2 +skip: + add x10, x10, #2 // increment cache number + cmp x3, x10 + b.gt loop1 +finished: + mov x10, #0 // swith back to cache level 0 + msr csselr_el1, x10 // select current cache level in csselr + dsb sy + isb + ret +ENDPROC(__flush_dcache_all) + +/* + * flush_cache_all() + * + * Flush the entire cache system. The data cache flush is now achieved + * using atomic clean / invalidates working outwards from L1 cache. This + * is done using Set/Way based cache maintainance instructions. The + * instruction cache can still be invalidated back to the point of + * unification in a single instruction. + */ +ENTRY(flush_cache_all) + mov x12, lr + bl __flush_dcache_all + mov x0, #0 + ic ialluis // I+BTB cache invalidate + ret x12 +ENDPROC(flush_cache_all) /* * flush_icache_range(start,end) @@ -48,6 +121,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) + uaccess_ttbr0_enable x2, x3, x4 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 @@ -69,10 +143,12 @@ USER(9f, ic ivau, x4 ) // invalidate I dsb ish isb mov x0, #0 +1: + uaccess_ttbr0_disable x1, x2 ret 9: mov x0, #-EFAULT - ret + b 1b ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) @@ -198,3 +274,18 @@ ENTRY(__dma_unmap_area) b.ne __dma_inv_area ret ENDPIPROC(__dma_unmap_area) + +/* + * __dma_flush_range(start, size) + * + * clean & invalidate D / U line + * + * - start - virtual start address of region + * - size - size in question + */ +ENTRY(__dma_flush_range) + uaccess_ttbr0_enable x2, x3, x4 + dcache_by_line_op civac, sy, x0, x1, x2, x3 + uaccess_ttbr0_disable x1, x2 + ret +ENDPIPROC(__dma_flush_range)