[T106][ZXW-22]7520V3SCV2.01.01.02P42U09_VEC_V0.8_AP_VEC origin source commit

Change-Id: Ic6e05d89ecd62fc34f82b23dcf306c93764aec4b
diff --git a/boot/common/src/uboot/compress/head.S b/boot/common/src/uboot/compress/head.S
new file mode 100644
index 0000000..fedf779
--- /dev/null
+++ b/boot/common/src/uboot/compress/head.S
@@ -0,0 +1,446 @@
+/*******************************************************************************
+* °æÈ¨ËùÓÐ (C)2016, ÖÐÐËͨѶ¹É·ÝÓÐÏÞ¹«Ë¾¡£
+* 
+* ÎļþÃû³Æ:     head.S
+* Îļþ±êʶ:     head.S
+* ÄÚÈÝÕªÒª:     °æ±¾½âѹÆô¶¯´úÂë
+* 
+* ÐÞ¸ÄÈÕÆÚ        °æ±¾ºÅ      Ð޸ıê¼Ç        ÐÞ¸ÄÈË          ÐÞ¸ÄÄÚÈÝ
+* ------------------------------------------------------------------------------
+* 2016/09/12      V1.0        Create          µËÄþˆÒ          ´´½¨
+* 
+*******************************************************************************/
+
+/*******************************************************************************
+*                                   Í·Îļþ                                     *
+*******************************************************************************/
+
+/*******************************************************************************
+*                                   ºê¶¨Òå                                     *
+*******************************************************************************/
+#ifdef __thumb2__
+#define ARM(x...)
+#define THUMB(x...)	x
+#define W(instr)	instr.w
+#else
+#define ARM(x...)	x
+#define THUMB(x...)
+#define W(instr)	instr
+#endif
+#define ARM_BE8(x...)
+
+#define END(name) \
+	.size name, .-name
+
+#define ENDPROC(name) \
+  .type name, %function; \
+  END(name)
+
+#define CYGOPT_HAL_ARM_MMU
+#define CONFIG_CPU_CP15
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+#define CB_BITS 0x08
+#else
+#define CB_BITS 0x0c
+#endif
+
+/*******************************************************************************
+*                                Íⲿº¯ÊýÉùÃ÷                                  *
+*******************************************************************************/
+.extern decompress_kernel
+
+/*******************************************************************************
+*                                Íⲿ±äÁ¿ÉùÃ÷                                  *
+*******************************************************************************/
+.extern image_start
+
+/*******************************************************************************
+*                                È«¾Öº¯ÊýʵÏÖ                                  *
+*******************************************************************************/
+
+.section ".start", #alloc, #execinstr
+.align
+.arm				@ Always enter in ARM state
+.global _start
+.type	_start, function
+
+_start:
+    .rept	8
+    mov	r0, r0
+    .endr
+		
+.text
+    /* move to SVC MODE */
+    mrs     r0, cpsr
+    bic     r0, #0x1f
+    orr     r0, r0, #0xd3
+    bic     r0, #(1<<8)                     /* unmask Asynchronous abort */
+    msr     cpsr_cxsf, r0
+
+    /* Control Register Setup */
+    mrc     p15, 0, r0, c1, c0, 0
+    bic     r0, r0, #(1<<0)         /* MMU disabled */
+    orr     r0, r0, #(1<<1)         /* Alignment fault checking enabled */
+    bic     r0, r0, #(1<<2)         /* Data Cache disabled */
+    orr     r0, r0, #(1<<11)        /* Branch prediction enabled */
+    bic     r0, r0, #(1<<12)        /* Instruction Cache disabled */
+    bic     r0, r0, #(1<<13)        /* USE VBAR to set the vector base address */
+    DSB                             /* Ensure all previous loads/stores have completed */
+    mcr     p15, 0, r0, c1, c0, 0
+    ISB
+        
+    adr	r0, LC0
+    ldmia   r0, {r1, r2, r3, r6, r10, r11, r12}
+    ldr	sp, [r0, #28]
+
+    /*
+     * We might be running at a different address.  We need
+     * to fix up various pointers.
+     */
+    sub	r0,  r0,  r1    @ calculate the delta offset
+    add r2,  r2,  r0    @ __bss_start
+    add r3,  r3,  r0    @ __bss_end
+    add	r6,  r6,  r0    @ _edata
+    add	r10, r10, r0    @ inflated kernel size location
+    add r11, r11, r0    @ got_start
+    add r12, r12, r0    @ got_end
+    add	sp,  sp,  r0    @ sp
+
+    /*¡¡½âѹºóÄÚºËµÄÆô¶¯µØÖ· */
+    ldr r4, =image_start
+    add r4,  r4,  r0
+    ldr r4, [r4]
+
+    /*
+     * The kernel build system appends the size of the
+     * decompressed kernel at the end of the compressed data
+     * in little-endian form.
+     */
+    ldrb	r9, [r10, #0]
+    ldrb	lr, [r10, #1]
+    orr	    r9, r9, lr, lsl #8
+    ldrb	lr, [r10, #2]
+    ldrb	r10, [r10, #3]
+    orr	    r9, r9, lr, lsl #16
+    orr	    r9, r9, r10, lsl #24
+
+    add     r10, r4, r9
+    adr     r9, _clear_bss
+    cmp	    r10, r9
+dead_loop:
+    bgt     dead_loop
+
+    /*
+     * Relocate all entries in the GOT table.
+     * Bump bss entries to _edata + dtb size
+     */
+    mov r5, #0
+1:		
+    ldr	  r1, [r11, #0]     @ relocate entries in the GOT
+    add	  r1, r1, r0        @ This fixes up C references
+    cmp	  r1, r2            @ if entry >= bss_start &&
+    cmphs r3, r1            @ bss_end > entry
+    addhi r1, r1, r5        @ entry += dtb size
+    str	  r1, [r11], #4     @ next entry
+    cmp	  r11, r12
+    blo	  1b
+
+    /* bump our bss pointers too */
+    add	r2, r2, r5
+    add	r3, r3, r5
+
+    /*
+     * BSS¶ÎÇåÁã
+     */
+    mov     r0, #0
+_clear_bss:
+    str     r0, [r2], #4
+    cmp     r3, r2
+    bhi     _clear_bss
+
+    bl  cache_on
+    /*
+     * decompress kernel
+     */
+    mov	r0, r4
+    mov	r1, sp			    @ malloc space above stack
+    add	r2, sp, #0x10000	@ 64k max
+    bl  decompress_kernel
+    bl  cache_clean_flush
+    bl  cache_off
+
+    /*
+     * enter kernel
+     */
+    mov	r0, #0
+    bx	r4
+
+.align	2
+.type	LC0, #object
+
+LC0:		
+    .word	LC0                 @ r1
+    .word	__bss_start         @ r2
+    .word	__bss_end           @ r3
+    .word	_edata              @ r6
+    .word	input_data_end - 4  @ r10 (inflated size location)
+    .word	_got_start          @ r11
+    .word	_got_end            @ ip    
+    .word	.L_user_stack_end   @ sp
+.size	LC0, . - LC0
+
+__setup_mmu:
+		lsr r3, r4, #0x14
+		lsl r3, r3, #0x14
+        sub	r3, r3, #16384		@ Page directory size
+		bic	r3, r3, #0xff		@ Align the pointer
+		bic	r3, r3, #0x3f00
+/*
+ * Initialise the page tables, turning on the cacheable and bufferable
+ * bits for the RAM area only.
+ */
+		mov	r0, r3
+		mov	r9, r0, lsr #18
+		mov	r9, r9, lsl #18		@ start of RAM
+		add	r10, r9, #0x10000000	@ a reasonable RAM size
+		mov	r1, #0x12		@ XN|U + section mapping
+		orr	r1, r1, #3 << 10	@ AP=11
+		add	r2, r3, #16384
+1:		cmp	r1, r9			@ if virt > start of RAM
+		cmphs	r10, r1			@   && end of RAM > virt
+		bic	r1, r1, #0x1c		@ clear XN|U + C + B
+		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
+		orrhs	r1, r1, r6		@ set RAM section settings
+		str	r1, [r0], #4		@ 1:1 mapping
+		add	r1, r1, #1048576
+		teq	r0, r2
+		bne	1b
+/*
+ * If ever we are running from Flash, then we surely want the cache
+ * to be enabled also for our execution instance...  We map 2MB of it
+ * so there is no map overlap problem for up to 1 MB compressed kernel.
+ * If the execution is in RAM then we would only be duplicating the above.
+ */
+		orr	r1, r6, #0x04		@ ensure B is set for this
+		orr	r1, r1, #3 << 10
+		mov	r2, pc
+		mov	r2, r2, lsr #20
+		orr	r1, r1, r2, lsl #20
+		add	r0, r3, r2, lsl #2
+		str	r1, [r0], #4
+		add	r1, r1, #1048576
+		str	r1, [r0]
+		mov	pc, lr
+ENDPROC(__setup_mmu)
+
+__armv7_mmu_cache_on:
+		mov	r12, lr
+#ifdef CYGOPT_HAL_ARM_MMU
+		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
+		tst	r11, #0xf		@ VMSA
+		movne	r6, #CB_BITS | 0x02	@ !XN
+		blne	__setup_mmu
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
+		tst	r11, #0xf		@ VMSA
+		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+#endif
+		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
+		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
+		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
+		orr	r0, r0, #0x003c		@ write buffer
+		bic	r0, r0, #2		@ A (no unaligned access fault)
+		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
+						@ (needed for ARM1176)
+#ifdef CYGOPT_HAL_ARM_MMU
+ ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
+		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
+		orrne	r0, r0, #1		@ MMU enabled
+		movne	r1, #0xfffffffd		@ domain 0 = client
+		bic     r6, r6, #1 << 31        @ 32-bit translation system
+		bic     r6, r6, #3 << 0         @ use only ttbr0
+		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
+		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
+		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
+#endif
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
+		mcr	p15, 0, r0, c1, c0, 0	@ load control register
+		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
+		mov	pc, r12
+
+#define PROC_ENTRY_SIZE (4*5)
+
+/* cache on */
+		.align	5
+cache_on:	mov	r3, #8			@ cache_on function
+		b	call_cache_fn
+
+call_cache_fn:	adr	r12, proc_types
+#ifdef CONFIG_CPU_CP15
+		mrc	p15, 0, r9, c0, c0	@ get processor ID
+#elif defined(CONFIG_CPU_V7M)
+		/*
+		 * On v7-M the processor id is located in the V7M_SCB_CPUID
+		 * register, but as cache handling is IMPLEMENTATION DEFINED on
+		 * v7-M (if existant at all) we just return early here.
+		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
+		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
+		 * use cp15 registers that are not implemented on v7-M.
+		 */
+		bx	lr
+#else
+		ldr	r9, =CONFIG_PROCESSOR_ID
+#endif
+1:		ldr	r1, [r12, #0]		@ get value
+		ldr	r2, [r12, #4]		@ get mask
+		eor	r1, r1, r9		@ (real ^ match)
+		tst	r1, r2			@       & mask
+ ARM(		addeq	pc, r12, r3		) @ call cache function
+ THUMB(		addeq	r12, r3			)
+ THUMB(		moveq	pc, r12			) @ call cache function
+		add	r12, r12, #PROC_ENTRY_SIZE
+		b	1b
+
+/*
+ * Table for cache operations.  This is basically:
+ *   - CPU ID match
+ *   - CPU ID mask
+ *   - 'cache on' method instruction
+ *   - 'cache off' method instruction
+ *   - 'cache flush' method instruction
+ *
+ * We match an entry using: ((real_id ^ match) & mask) == 0
+ *
+ * Writethrough caches generally only need 'on' and 'off'
+ * methods.  Writeback caches _must_ have the flush method
+ * defined.
+ */
+		.align	2
+		.type	proc_types,#object
+proc_types:
+		.word	0x000f0000		@ new CPU Id
+		.word	0x000f0000
+		W(b)	__armv7_mmu_cache_on
+		W(b)	__armv7_mmu_cache_off
+		W(b)	__armv7_mmu_cache_flush
+
+		.word	0			@ unrecognised type
+		.word	0
+		mov	pc, lr
+ THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
+
+		.size	proc_types, . - proc_types
+
+		/*
+		 * If you get a "non-constant expression in ".if" statement"
+		 * error from the assembler on this line, check that you have
+		 * not accidentally written a "b" instruction where you should
+		 * have written W(b).
+		 */
+		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
+		.error "The size of one or more proc_types entries is wrong."
+		.endif
+
+		.align	5
+cache_off:	mov	r3, #12			@ cache_off function
+		b	call_cache_fn
+
+__armv7_mmu_cache_off:
+		mrc	p15, 0, r0, c1, c0
+#ifdef CYGOPT_HAL_ARM_MMU
+		bic	r0, r0, #0x000d
+#else
+		bic	r0, r0, #0x000c
+#endif
+		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
+		mov	r12, lr
+		bl	__armv7_mmu_cache_flush
+		mov	r0, #0
+#ifdef CYGOPT_HAL_ARM_MMU
+		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
+#endif
+		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
+		mcr	p15, 0, r0, c7, c10, 4	@ DSB
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
+		mov	pc, r12
+
+		.align	5
+cache_clean_flush:
+		mov	r3, #16
+		b	call_cache_fn
+
+__armv7_mmu_cache_flush:
+		tst	r4, #1
+		bne	_iflush
+		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
+		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
+		mov	r10, #0
+		beq	hierarchical
+		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
+		b	_iflush
+hierarchical:
+		mcr	p15, 0, r10, c7, c10, 5	@ DMB
+		stmfd	sp!, {r0-r7, r9-r11}
+		mrc	p15, 1, r0, c0, c0, 1
+		ands	r3, r0, #0x7000000
+		mov	r3, r3, lsr #23
+		beq	_finished
+		mov	r10, #0
+_loop1:
+		add	r2, r10, r10, lsr #1
+		mov	r1, r0, lsr r2
+		and	r1, r1, #7
+		cmp	r1, #2
+		blt	_skip
+		mcr	p15, 2, r10, c0, c0, 0
+		mcr	p15, 0, r10, c7, c5, 4
+		mrc	p15, 1, r1, c0, c0, 0
+		and	r2, r1, #7
+		add	r2, r2, #4
+		ldr	r4, =0x3ff
+		ands	r4, r4, r1, lsr #3
+		clz	r5, r4
+		ldr	r7, =0x7fff
+		ands	r7, r7, r1, lsr #13
+_loop2:
+		mov	r9, r4
+_loop3:
+ ARM(		orr	r11, r10, r9, lsl r5	)
+ ARM(		orr	r11, r11, r7, lsl r2	)
+ THUMB(		lsl	r6, r9, r5		)
+ THUMB(		orr	r11, r10, r6		)
+ THUMB(		lsl	r6, r7, r2		)
+ THUMB(		orr	r11, r11, r6		)
+		mcr	p15, 0, r11, c7, c14, 2
+		subs	r9, r9, #1
+		bge	_loop3
+		subs	r7, r7, #1
+		bge	_loop2
+_skip:
+		add	r10, r10, #2
+		cmp	r3, r10
+		bgt	_loop1
+_finished:
+		ldmfd	sp!, {r0-r7, r9-r11}
+		mov	r10, #0			@ swith back to cache level 0
+		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
+_iflush:
+		mcr	p15, 0, r10, c7, c10, 4	@ DSB
+		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
+		mcr	p15, 0, r10, c7, c10, 4	@ DSB
+		mcr	p15, 0, r10, c7, c5, 4	@ ISB
+		mov	pc, lr
+
+.align
+.section ".stack", "aw", %nobits
+
+.L_user_stack:	.space	4096
+.L_user_stack_end:
+