[T106][ZXW-22]7520V3SCV2.01.01.02P42U09_VEC_V0.8_AP_VEC origin source commit
Change-Id: Ic6e05d89ecd62fc34f82b23dcf306c93764aec4b
diff --git a/boot/common/src/uboot/compress/head.S b/boot/common/src/uboot/compress/head.S
new file mode 100644
index 0000000..fedf779
--- /dev/null
+++ b/boot/common/src/uboot/compress/head.S
@@ -0,0 +1,446 @@
+/*******************************************************************************
+* °æÈ¨ËùÓÐ (C)2016, ÖÐÐËͨѶ¹É·ÝÓÐÏÞ¹«Ë¾¡£
+*
+* ÎļþÃû³Æ: head.S
+* Îļþ±êʶ: head.S
+* ÄÚÈÝÕªÒª: °æ±¾½âѹÆô¶¯´úÂë
+*
+* ÐÞ¸ÄÈÕÆÚ °æ±¾ºÅ Ð޸ıê¼Ç ÐÞ¸ÄÈË ÐÞ¸ÄÄÚÈÝ
+* ------------------------------------------------------------------------------
+* 2016/09/12 V1.0 Create µËÄþÒ ´´½¨
+*
+*******************************************************************************/
+
+/*******************************************************************************
+* Í·Îļþ *
+*******************************************************************************/
+
+/*******************************************************************************
+* ºê¶¨Òå *
+*******************************************************************************/
+#ifdef __thumb2__
+#define ARM(x...)
+#define THUMB(x...) x
+#define W(instr) instr.w
+#else
+#define ARM(x...) x
+#define THUMB(x...)
+#define W(instr) instr
+#endif
+#define ARM_BE8(x...)
+
+#define END(name) \
+ .size name, .-name
+
+#define ENDPROC(name) \
+ .type name, %function; \
+ END(name)
+
+#define CYGOPT_HAL_ARM_MMU
+#define CONFIG_CPU_CP15
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+#define CB_BITS 0x08
+#else
+#define CB_BITS 0x0c
+#endif
+
+/*******************************************************************************
+* Íⲿº¯ÊýÉùÃ÷ *
+*******************************************************************************/
+.extern decompress_kernel
+
+/*******************************************************************************
+* Íⲿ±äÁ¿ÉùÃ÷ *
+*******************************************************************************/
+.extern image_start
+
+/*******************************************************************************
+* È«¾Öº¯ÊýʵÏÖ *
+*******************************************************************************/
+
+.section ".start", #alloc, #execinstr
+.align
+.arm @ Always enter in ARM state
+.global _start
+.type _start, function
+
+_start:
+ .rept 8
+ mov r0, r0
+ .endr
+
+.text
+ /* move to SVC MODE */
+ mrs r0, cpsr
+ bic r0, #0x1f
+ orr r0, r0, #0xd3
+ bic r0, #(1<<8) /* unmask Asynchronous abort */
+ msr cpsr_cxsf, r0
+
+ /* Control Register Setup */
+ mrc p15, 0, r0, c1, c0, 0
+ bic r0, r0, #(1<<0) /* MMU disabled */
+ orr r0, r0, #(1<<1) /* Alignment fault checking enabled */
+ bic r0, r0, #(1<<2) /* Data Cache disabled */
+ orr r0, r0, #(1<<11) /* Branch prediction enabled */
+ bic r0, r0, #(1<<12) /* Instruction Cache disabled */
+ bic r0, r0, #(1<<13) /* USE VBAR to set the vector base address */
+ DSB /* Ensure all previous loads/stores have completed */
+ mcr p15, 0, r0, c1, c0, 0
+ ISB
+
+ adr r0, LC0
+ ldmia r0, {r1, r2, r3, r6, r10, r11, r12}
+ ldr sp, [r0, #28]
+
+ /*
+ * We might be running at a different address. We need
+ * to fix up various pointers.
+ */
+ sub r0, r0, r1 @ calculate the delta offset
+ add r2, r2, r0 @ __bss_start
+ add r3, r3, r0 @ __bss_end
+ add r6, r6, r0 @ _edata
+ add r10, r10, r0 @ inflated kernel size location
+ add r11, r11, r0 @ got_start
+ add r12, r12, r0 @ got_end
+ add sp, sp, r0 @ sp
+
+ /*¡¡½âѹºóÄÚºËµÄÆô¶¯µØÖ· */
+ ldr r4, =image_start
+ add r4, r4, r0
+ ldr r4, [r4]
+
+ /*
+ * The kernel build system appends the size of the
+ * decompressed kernel at the end of the compressed data
+ * in little-endian form.
+ */
+ ldrb r9, [r10, #0]
+ ldrb lr, [r10, #1]
+ orr r9, r9, lr, lsl #8
+ ldrb lr, [r10, #2]
+ ldrb r10, [r10, #3]
+ orr r9, r9, lr, lsl #16
+ orr r9, r9, r10, lsl #24
+
+ add r10, r4, r9
+ adr r9, _clear_bss
+ cmp r10, r9
+dead_loop:
+ bgt dead_loop
+
+ /*
+ * Relocate all entries in the GOT table.
+ * Bump bss entries to _edata + dtb size
+ */
+ mov r5, #0
+1:
+ ldr r1, [r11, #0] @ relocate entries in the GOT
+ add r1, r1, r0 @ This fixes up C references
+ cmp r1, r2 @ if entry >= bss_start &&
+ cmphs r3, r1 @ bss_end > entry
+ addhi r1, r1, r5 @ entry += dtb size
+ str r1, [r11], #4 @ next entry
+ cmp r11, r12
+ blo 1b
+
+ /* bump our bss pointers too */
+ add r2, r2, r5
+ add r3, r3, r5
+
+ /*
+ * BSS¶ÎÇåÁã
+ */
+ mov r0, #0
+_clear_bss:
+ str r0, [r2], #4
+ cmp r3, r2
+ bhi _clear_bss
+
+ bl cache_on
+ /*
+ * decompress kernel
+ */
+ mov r0, r4
+ mov r1, sp @ malloc space above stack
+ add r2, sp, #0x10000 @ 64k max
+ bl decompress_kernel
+ bl cache_clean_flush
+ bl cache_off
+
+ /*
+ * enter kernel
+ */
+ mov r0, #0
+ bx r4
+
+.align 2
+.type LC0, #object
+
+LC0:
+ .word LC0 @ r1
+ .word __bss_start @ r2
+ .word __bss_end @ r3
+ .word _edata @ r6
+ .word input_data_end - 4 @ r10 (inflated size location)
+ .word _got_start @ r11
+ .word _got_end @ ip
+ .word .L_user_stack_end @ sp
+.size LC0, . - LC0
+
+__setup_mmu:
+ lsr r3, r4, #0x14
+ lsl r3, r3, #0x14
+ sub r3, r3, #16384 @ Page directory size
+ bic r3, r3, #0xff @ Align the pointer
+ bic r3, r3, #0x3f00
+/*
+ * Initialise the page tables, turning on the cacheable and bufferable
+ * bits for the RAM area only.
+ */
+ mov r0, r3
+ mov r9, r0, lsr #18
+ mov r9, r9, lsl #18 @ start of RAM
+ add r10, r9, #0x10000000 @ a reasonable RAM size
+ mov r1, #0x12 @ XN|U + section mapping
+ orr r1, r1, #3 << 10 @ AP=11
+ add r2, r3, #16384
+1: cmp r1, r9 @ if virt > start of RAM
+ cmphs r10, r1 @ && end of RAM > virt
+ bic r1, r1, #0x1c @ clear XN|U + C + B
+ orrlo r1, r1, #0x10 @ Set XN|U for non-RAM
+ orrhs r1, r1, r6 @ set RAM section settings
+ str r1, [r0], #4 @ 1:1 mapping
+ add r1, r1, #1048576
+ teq r0, r2
+ bne 1b
+/*
+ * If ever we are running from Flash, then we surely want the cache
+ * to be enabled also for our execution instance... We map 2MB of it
+ * so there is no map overlap problem for up to 1 MB compressed kernel.
+ * If the execution is in RAM then we would only be duplicating the above.
+ */
+ orr r1, r6, #0x04 @ ensure B is set for this
+ orr r1, r1, #3 << 10
+ mov r2, pc
+ mov r2, r2, lsr #20
+ orr r1, r1, r2, lsl #20
+ add r0, r3, r2, lsl #2
+ str r1, [r0], #4
+ add r1, r1, #1048576
+ str r1, [r0]
+ mov pc, lr
+ENDPROC(__setup_mmu)
+
+__armv7_mmu_cache_on:
+ mov r12, lr
+#ifdef CYGOPT_HAL_ARM_MMU
+ mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0
+ tst r11, #0xf @ VMSA
+ movne r6, #CB_BITS | 0x02 @ !XN
+ blne __setup_mmu
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ tst r11, #0xf @ VMSA
+ mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
+#endif
+ mrc p15, 0, r0, c1, c0, 0 @ read control reg
+ bic r0, r0, #1 << 28 @ clear SCTLR.TRE
+ orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
+ orr r0, r0, #0x003c @ write buffer
+ bic r0, r0, #2 @ A (no unaligned access fault)
+ orr r0, r0, #1 << 22 @ U (v6 unaligned access model)
+ @ (needed for ARM1176)
+#ifdef CYGOPT_HAL_ARM_MMU
+ ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
+ mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg
+ orrne r0, r0, #1 @ MMU enabled
+ movne r1, #0xfffffffd @ domain 0 = client
+ bic r6, r6, #1 << 31 @ 32-bit translation system
+ bic r6, r6, #3 << 0 @ use only ttbr0
+ mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
+ mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
+ mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
+#endif
+ mcr p15, 0, r0, c7, c5, 4 @ ISB
+ mcr p15, 0, r0, c1, c0, 0 @ load control register
+ mrc p15, 0, r0, c1, c0, 0 @ and read it back
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 4 @ ISB
+ mov pc, r12
+
+#define PROC_ENTRY_SIZE (4*5)
+
+/* cache on */
+ .align 5
+cache_on: mov r3, #8 @ cache_on function
+ b call_cache_fn
+
+call_cache_fn: adr r12, proc_types
+#ifdef CONFIG_CPU_CP15
+ mrc p15, 0, r9, c0, c0 @ get processor ID
+#elif defined(CONFIG_CPU_V7M)
+ /*
+ * On v7-M the processor id is located in the V7M_SCB_CPUID
+ * register, but as cache handling is IMPLEMENTATION DEFINED on
+ * v7-M (if existant at all) we just return early here.
+ * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
+ * __armv7_mmu_cache_{on,off,flush}) would be selected which
+ * use cp15 registers that are not implemented on v7-M.
+ */
+ bx lr
+#else
+ ldr r9, =CONFIG_PROCESSOR_ID
+#endif
+1: ldr r1, [r12, #0] @ get value
+ ldr r2, [r12, #4] @ get mask
+ eor r1, r1, r9 @ (real ^ match)
+ tst r1, r2 @ & mask
+ ARM( addeq pc, r12, r3 ) @ call cache function
+ THUMB( addeq r12, r3 )
+ THUMB( moveq pc, r12 ) @ call cache function
+ add r12, r12, #PROC_ENTRY_SIZE
+ b 1b
+
+/*
+ * Table for cache operations. This is basically:
+ * - CPU ID match
+ * - CPU ID mask
+ * - 'cache on' method instruction
+ * - 'cache off' method instruction
+ * - 'cache flush' method instruction
+ *
+ * We match an entry using: ((real_id ^ match) & mask) == 0
+ *
+ * Writethrough caches generally only need 'on' and 'off'
+ * methods. Writeback caches _must_ have the flush method
+ * defined.
+ */
+ .align 2
+ .type proc_types,#object
+proc_types:
+ .word 0x000f0000 @ new CPU Id
+ .word 0x000f0000
+ W(b) __armv7_mmu_cache_on
+ W(b) __armv7_mmu_cache_off
+ W(b) __armv7_mmu_cache_flush
+
+ .word 0 @ unrecognised type
+ .word 0
+ mov pc, lr
+ THUMB( nop )
+ mov pc, lr
+ THUMB( nop )
+ mov pc, lr
+ THUMB( nop )
+
+ .size proc_types, . - proc_types
+
+ /*
+ * If you get a "non-constant expression in ".if" statement"
+ * error from the assembler on this line, check that you have
+ * not accidentally written a "b" instruction where you should
+ * have written W(b).
+ */
+ .if (. - proc_types) % PROC_ENTRY_SIZE != 0
+ .error "The size of one or more proc_types entries is wrong."
+ .endif
+
+ .align 5
+cache_off: mov r3, #12 @ cache_off function
+ b call_cache_fn
+
+__armv7_mmu_cache_off:
+ mrc p15, 0, r0, c1, c0
+#ifdef CYGOPT_HAL_ARM_MMU
+ bic r0, r0, #0x000d
+#else
+ bic r0, r0, #0x000c
+#endif
+ mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
+ mov r12, lr
+ bl __armv7_mmu_cache_flush
+ mov r0, #0
+#ifdef CYGOPT_HAL_ARM_MMU
+ mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB
+#endif
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC
+ mcr p15, 0, r0, c7, c10, 4 @ DSB
+ mcr p15, 0, r0, c7, c5, 4 @ ISB
+ mov pc, r12
+
+ .align 5
+cache_clean_flush:
+ mov r3, #16
+ b call_cache_fn
+
+__armv7_mmu_cache_flush:
+ tst r4, #1
+ bne _iflush
+ mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1
+ tst r10, #0xf << 16 @ hierarchical cache (ARMv7)
+ mov r10, #0
+ beq hierarchical
+ mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
+ b _iflush
+hierarchical:
+ mcr p15, 0, r10, c7, c10, 5 @ DMB
+ stmfd sp!, {r0-r7, r9-r11}
+ mrc p15, 1, r0, c0, c0, 1
+ ands r3, r0, #0x7000000
+ mov r3, r3, lsr #23
+ beq _finished
+ mov r10, #0
+_loop1:
+ add r2, r10, r10, lsr #1
+ mov r1, r0, lsr r2
+ and r1, r1, #7
+ cmp r1, #2
+ blt _skip
+ mcr p15, 2, r10, c0, c0, 0
+ mcr p15, 0, r10, c7, c5, 4
+ mrc p15, 1, r1, c0, c0, 0
+ and r2, r1, #7
+ add r2, r2, #4
+ ldr r4, =0x3ff
+ ands r4, r4, r1, lsr #3
+ clz r5, r4
+ ldr r7, =0x7fff
+ ands r7, r7, r1, lsr #13
+_loop2:
+ mov r9, r4
+_loop3:
+ ARM( orr r11, r10, r9, lsl r5 )
+ ARM( orr r11, r11, r7, lsl r2 )
+ THUMB( lsl r6, r9, r5 )
+ THUMB( orr r11, r10, r6 )
+ THUMB( lsl r6, r7, r2 )
+ THUMB( orr r11, r11, r6 )
+ mcr p15, 0, r11, c7, c14, 2
+ subs r9, r9, #1
+ bge _loop3
+ subs r7, r7, #1
+ bge _loop2
+_skip:
+ add r10, r10, #2
+ cmp r3, r10
+ bgt _loop1
+_finished:
+ ldmfd sp!, {r0-r7, r9-r11}
+ mov r10, #0 @ swith back to cache level 0
+ mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
+_iflush:
+ mcr p15, 0, r10, c7, c10, 4 @ DSB
+ mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB
+ mcr p15, 0, r10, c7, c10, 4 @ DSB
+ mcr p15, 0, r10, c7, c5, 4 @ ISB
+ mov pc, lr
+
+.align
+.section ".stack", "aw", %nobits
+
+.L_user_stack: .space 4096
+.L_user_stack_end:
+