[Feature]add MT2731_MP2_MR2_SVN388 baseline version
Change-Id: Ief04314834b31e27effab435d3ca8ba33b499059
diff --git a/src/bsp/lk/arch/arm64/start.S b/src/bsp/lk/arch/arm64/start.S
new file mode 100644
index 0000000..f36f5bf
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/start.S
@@ -0,0 +1,440 @@
+#include <asm.h>
+#include <arch/arm64/mmu.h>
+#include <arch/asm_macros.h>
+#include <arch/arch_ops.h>
+#include <kernel/vm.h>
+
+/*
+ * Register use:
+ * x0-x3 Arguments
+ * x9-x15 Scratch
+ * x19-x28 Globals
+ */
+tmp .req x9
+tmp2 .req x10
+wtmp2 .req w10
+idx .req x11
+idx_shift .req x12
+page_table .req x13
+new_page_table .req x14
+phys_offset .req x15
+
+cpuid .req x19
+page_table0 .req x20
+page_table1 .req x21
+mmu_initial_mapping .req x22
+vaddr .req x23
+paddr .req x24
+mapping_size .req x25
+size .req x26
+attr .req x27
+
+.section .text.boot
+FUNCTION(_start)
+.globl arm_reset
+arm_reset:
+
+ bl setup_el2_or_el3_exception_base
+
+ mrs tmp, CurrentEL
+ cmp tmp, #(0b11 << 2)
+ b.ne .Lsetup_el2_or_el3_stack
+
+ /* el3 set secure timer */
+ ldr tmp2, =13000000
+ msr cntfrq_el0, tmp2
+
+ /* el3 enable smp bit */
+ mrs tmp2, s3_1_c15_c2_1
+ orr tmp2, tmp2, #(1<<6)
+ msr s3_1_c15_c2_1, tmp2
+
+.Lsetup_el2_or_el3_stack:
+ /* set el2 or el3 stack pointer */
+ ldr tmp2, = __stack_end
+ mov sp, tmp2
+
+ /* initialization required in EL3. weak symbol at asm.S */
+ cmp tmp, #(0b11 << 2)
+ b.ne .LelX_to_el1
+ bl platform_el3_init
+
+.LelX_to_el1:
+ /* change to el1 */
+ bl arm64_elX_to_el1
+
+#if WITH_KERNEL_VM
+ /* enable caches so atomics and spinlocks work */
+ mrs tmp, sctlr_el1
+ orr tmp, tmp, #(1<<12) /* Enable icache */
+ orr tmp, tmp, #(1<<2) /* Enable dcache/ucache */
+ bic tmp, tmp, #(1<<3) /* Disable Stack Alignment Check */ /* TODO: don't use unaligned stacks */
+ msr sctlr_el1, tmp
+
+ /* set up the mmu according to mmu_initial_mappings */
+
+ /* load the base of the translation table and clear the table */
+ adrp page_table1, arm64_kernel_translation_table
+ add page_table1, page_table1, #:lo12:arm64_kernel_translation_table
+
+ /* Prepare tt_trampoline page table */
+ /* Calculate pagetable physical addresses */
+ adrp page_table0, tt_trampoline
+ add page_table0, page_table0, #:lo12:tt_trampoline
+
+#if WITH_SMP
+ mrs cpuid, mpidr_el1
+ ubfx cpuid, cpuid, #0, #SMP_CPU_ID_BITS
+ cbnz cpuid, .Lmmu_enable_secondary
+#endif
+
+ mov tmp, #0
+
+ /* walk through all the entries in the translation table, setting them up */
+.Lclear_top_page_table_loop:
+ str xzr, [page_table1, tmp, lsl #3]
+ add tmp, tmp, #1
+ cmp tmp, #MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP
+ bne .Lclear_top_page_table_loop
+
+ /* load the address of the mmu_initial_mappings table and start processing */
+ adrp mmu_initial_mapping, mmu_initial_mappings
+ add mmu_initial_mapping, mmu_initial_mapping, #:lo12:mmu_initial_mappings
+
+.Linitial_mapping_loop:
+/* Read entry of mmu_initial_mappings (likely defined in platform.c) */
+ ldp paddr, vaddr, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_PHYS_OFFSET]
+ ldp size, tmp, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_SIZE_OFFSET]
+
+ tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_DYNAMIC, .Lnot_dynamic
+ adr paddr, _start
+ mov size, x0
+ str paddr, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_PHYS_OFFSET]
+ str size, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_SIZE_OFFSET]
+
+.Lnot_dynamic:
+ /* if size == 0, end of list, done with initial mapping */
+ cbz size, .Linitial_mapping_done
+ mov mapping_size, size
+
+ /* set up the flags */
+ tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_UNCACHED, .Lnot_uncached
+ ldr attr, =MMU_INITIAL_MAP_STRONGLY_ORDERED
+ b .Lmem_type_done
+
+.Lnot_uncached:
+ /* is this memory mapped to device/peripherals? */
+ tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_DEVICE, .Lnot_device
+ ldr attr, =MMU_INITIAL_MAP_DEVICE
+ b .Lmem_type_done
+.Lnot_device:
+
+/* Determine the segment in which the memory resides and set appropriate
+ * attributes. In order to handle offset kernels, the following rules are
+ * implemented below:
+ * KERNEL_BASE to __code_start -read/write (see note below)
+ * __code_start to __rodata_start (.text) -read only
+ * __rodata_start to __data_start (.rodata) -read only, execute never
+ * __data_start to ..... (.data) -read/write
+ *
+ * The space below __code_start is presently left as read/write (same as .data)
+ * mainly as a workaround for the raspberry pi boot process. Boot vectors for
+ * secondary CPUs are in this area and need to be updated by cpu0 once the system
+ * is ready to boot the secondary processors.
+ * TODO: handle this via mmu_initial_mapping entries, which may need to be
+ * extended with additional flag types
+ */
+.Lmapping_size_loop:
+ ldr attr, =MMU_PTE_KERNEL_DATA_FLAGS
+ ldr tmp, =__code_start
+ subs size, tmp, vaddr
+ /* If page is below the entry point (_start) mark as kernel data */
+ b.hi .Lmem_type_done
+
+ ldr attr, =MMU_PTE_KERNEL_RO_FLAGS
+ ldr tmp, =__rodata_start
+ subs size, tmp, vaddr
+ b.hi .Lmem_type_done
+ orr attr, attr, #MMU_PTE_ATTR_PXN
+ ldr tmp, =__data_start
+ subs size, tmp, vaddr
+ b.hi .Lmem_type_done
+ ldr attr, =MMU_PTE_KERNEL_DATA_FLAGS
+ ldr tmp, =_end
+ subs size, tmp, vaddr
+ b.lo . /* Error: _end < vaddr */
+ cmp mapping_size, size
+ b.lo . /* Error: mapping_size < size => RAM size too small for data/bss */
+ mov size, mapping_size
+
+.Lmem_type_done:
+ subs mapping_size, mapping_size, size
+ b.lo . /* Error: mapping_size < size (RAM size too small for code/rodata?) */
+
+ /* Check that paddr, vaddr and size are page aligned */
+ orr tmp, vaddr, paddr
+ orr tmp, tmp, size
+ tst tmp, #(1 << MMU_KERNEL_PAGE_SIZE_SHIFT) - 1
+ bne . /* Error: not page aligned */
+
+ /* Clear top bits of virtual address (should be all set) */
+ eor vaddr, vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT)
+
+ /* Check that top bits were all set */
+ tst vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT)
+ bne . /* Error: vaddr out of range */
+
+.Lmap_range_top_loop:
+ /* Select top level page table */
+ mov page_table, page_table1
+ mov idx_shift, #MMU_KERNEL_TOP_SHIFT
+
+ lsr idx, vaddr, idx_shift
+
+
+/* determine the type of page table entry to use given alignment and size
+ * of the chunk of memory we are mapping
+ */
+.Lmap_range_one_table_loop:
+ /* Check if current level allow block descriptors */
+ cmp idx_shift, #MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT
+ b.hi .Lmap_range_need_page_table
+
+ /* Check if paddr and vaddr alignment allows a block descriptor */
+ orr tmp2, vaddr, paddr
+ lsr tmp, tmp2, idx_shift
+ lsl tmp, tmp, idx_shift
+ cmp tmp, tmp2
+ b.ne .Lmap_range_need_page_table
+
+ /* Check if size is large enough for a block mapping */
+ lsr tmp, size, idx_shift
+ cbz tmp, .Lmap_range_need_page_table
+
+ /* Select descriptor type, page for level 3, block for level 0-2 */
+ orr tmp, attr, #MMU_PTE_L3_DESCRIPTOR_PAGE
+ cmp idx_shift, MMU_KERNEL_PAGE_SIZE_SHIFT
+ beq .Lmap_range_l3
+ orr tmp, attr, #MMU_PTE_L012_DESCRIPTOR_BLOCK
+.Lmap_range_l3:
+
+ /* Write page table entry */
+ orr tmp, tmp, paddr
+ str tmp, [page_table, idx, lsl #3]
+
+ /* Move to next page table entry */
+ mov tmp, #1
+ lsl tmp, tmp, idx_shift
+ add vaddr, vaddr, tmp
+ add paddr, paddr, tmp
+ subs size, size, tmp
+ /* TODO: add local loop if next entry is in the same page table */
+ b.ne .Lmap_range_top_loop /* size != 0 */
+
+ /* Restore top bits of virtual address (should be all set) */
+ eor vaddr, vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT)
+ /* Move to next subtype of ram mmu_initial_mappings entry */
+ cbnz mapping_size, .Lmapping_size_loop
+
+ /* Move to next mmu_initial_mappings entry */
+ add mmu_initial_mapping, mmu_initial_mapping, __MMU_INITIAL_MAPPING_SIZE
+ b .Linitial_mapping_loop
+
+.Lmap_range_need_page_table:
+ /* Check if page table entry is unused */
+ ldr new_page_table, [page_table, idx, lsl #3]
+ cbnz new_page_table, .Lmap_range_has_page_table
+
+ /* Calculate phys offset (needed for memory allocation) */
+.Lphys_offset:
+ adr phys_offset, .Lphys_offset /* phys */
+ ldr tmp, =.Lphys_offset /* virt */
+ sub phys_offset, tmp, phys_offset
+
+ /* Allocate new page table */
+ calloc_bootmem_aligned new_page_table, tmp, tmp2, MMU_KERNEL_PAGE_SIZE_SHIFT, phys_offset
+
+ /* Write page table entry (with allocated page table) */
+ orr new_page_table, new_page_table, #MMU_PTE_L012_DESCRIPTOR_TABLE
+ str new_page_table, [page_table, idx, lsl #3]
+
+.Lmap_range_has_page_table:
+ /* Check descriptor type */
+ and tmp, new_page_table, #MMU_PTE_DESCRIPTOR_MASK
+ cmp tmp, #MMU_PTE_L012_DESCRIPTOR_TABLE
+ b.ne . /* Error: entry already in use (as a block entry) */
+
+ /* switch to next page table level */
+ bic page_table, new_page_table, #MMU_PTE_DESCRIPTOR_MASK
+ mov tmp, #~0
+ lsl tmp, tmp, idx_shift
+ bic tmp, vaddr, tmp
+ sub idx_shift, idx_shift, #(MMU_KERNEL_PAGE_SIZE_SHIFT - 3)
+ lsr idx, tmp, idx_shift
+
+ b .Lmap_range_one_table_loop
+
+.Linitial_mapping_done:
+
+ /* Prepare tt_trampoline page table */
+
+ /* Zero tt_trampoline translation tables */
+ mov tmp, #0
+.Lclear_tt_trampoline:
+ str xzr, [page_table0, tmp, lsl#3]
+ add tmp, tmp, #1
+ cmp tmp, #MMU_PAGE_TABLE_ENTRIES_IDENT
+ blt .Lclear_tt_trampoline
+
+ /* Setup mapping at phys -> phys */
+ adr tmp, .Lmmu_on_pc
+ lsr tmp, tmp, #MMU_IDENT_TOP_SHIFT /* tmp = paddr index */
+ ldr tmp2, =MMU_PTE_IDENT_FLAGS
+ add tmp2, tmp2, tmp, lsl #MMU_IDENT_TOP_SHIFT /* tmp2 = pt entry */
+
+ str tmp2, [page_table0, tmp, lsl #3] /* tt_trampoline[paddr index] = pt entry */
+
+#if WITH_SMP
+ adrp tmp, page_tables_not_ready
+ add tmp, tmp, #:lo12:page_tables_not_ready
+ str wzr, [tmp]
+ b .Lpage_tables_ready
+
+.Lmmu_enable_secondary:
+ adrp tmp, page_tables_not_ready
+ add tmp, tmp, #:lo12:page_tables_not_ready
+.Lpage_tables_not_ready:
+ ldr wtmp2, [tmp]
+ cbnz wtmp2, .Lpage_tables_not_ready
+.Lpage_tables_ready:
+#endif
+
+ /* set up the mmu */
+
+ /* Invalidate TLB */
+ tlbi vmalle1is
+ isb
+ dsb sy
+
+ /* Initialize Memory Attribute Indirection Register */
+ ldr tmp, =MMU_MAIR_VAL
+ msr mair_el1, tmp
+
+ /* Initialize TCR_EL1 */
+ /* set cacheable attributes on translation walk */
+ /* (SMP extensions) non-shareable, inner write-back write-allocate */
+ ldr tmp, =MMU_TCR_FLAGS_IDENT
+ msr tcr_el1, tmp
+
+ isb
+
+ /* Write ttbr with phys addr of the translation table */
+ msr ttbr0_el1, page_table0
+ msr ttbr1_el1, page_table1
+ isb
+
+ /* Read SCTLR */
+ mrs tmp, sctlr_el1
+
+ /* Turn on the MMU */
+ orr tmp, tmp, #0x1
+
+ /* Write back SCTLR */
+ msr sctlr_el1, tmp
+.Lmmu_on_pc:
+ isb
+
+ /* Jump to virtual code address */
+ ldr tmp, =.Lmmu_on_vaddr
+ br tmp
+
+.Lmmu_on_vaddr:
+
+ /* Disable trampoline page-table in ttbr0 */
+ ldr tmp, =MMU_TCR_FLAGS_KERNEL
+ msr tcr_el1, tmp
+ isb
+
+
+ /* Invalidate TLB */
+ tlbi vmalle1
+ isb
+
+#if WITH_SMP
+ cbnz cpuid, .Lsecondary_boot
+#endif
+#endif /* WITH_KERNEL_VM */
+
+ ldr tmp, =__stack_end
+ mov sp, tmp
+
+ /* clear bss */
+.L__do_bss:
+ /* clear out the bss excluding the stack and kernel translation table */
+ /* NOTE: relies on __post_prebss_bss_start and __bss_end being 8 byte aligned */
+ ldr tmp, =__post_prebss_bss_start
+ ldr tmp2, =__bss_end
+ sub tmp2, tmp2, tmp
+ cbz tmp2, .L__bss_loop_done
+.L__bss_loop:
+ sub tmp2, tmp2, #8
+ str xzr, [tmp], #8
+ cbnz tmp2, .L__bss_loop
+.L__bss_loop_done:
+
+ bl lk_main
+ b .
+
+#if WITH_SMP
+.Lsecondary_boot:
+ and tmp, cpuid, #0xff
+ cmp tmp, #(1 << SMP_CPU_CLUSTER_SHIFT)
+ bge .Lunsupported_cpu_trap
+ bic cpuid, cpuid, #0xff
+ orr cpuid, tmp, cpuid, LSR #(8 - SMP_CPU_CLUSTER_SHIFT)
+ adrp tmp, linear_cpuid_map
+ add tmp, tmp, #:lo12:linear_cpuid_map
+ ldr tmp, [tmp]
+ cbz tmp, .Lno_cpuid_remap
+ add tmp, tmp, cpuid
+ ldrb wtmp2, [tmp]
+ ubfx cpuid, tmp2, #0, #31
+
+.Lno_cpuid_remap:
+ cmp cpuid, #SMP_MAX_CPUS
+ bge .Lunsupported_cpu_trap
+
+ /* Set up the stack */
+ ldr tmp, =__stack_end
+ mov tmp2, #ARCH_DEFAULT_STACK_SIZE
+ mul tmp2, tmp2, cpuid
+ sub sp, tmp, tmp2
+
+ mov x0, cpuid
+ bl arm64_secondary_entry
+
+.Lunsupported_cpu_trap:
+ wfe
+ b .Lunsupported_cpu_trap
+#endif
+
+.ltorg
+
+#if WITH_SMP
+.data
+DATA(page_tables_not_ready)
+ .long 1
+#endif
+
+.section .bss.prebss.stack
+ .align 4
+DATA(__stack)
+ .skip ARCH_DEFAULT_STACK_SIZE * SMP_MAX_CPUS
+DATA(__stack_end)
+
+#if WITH_KERNEL_VM
+.section ".bss.prebss.translation_table"
+.align 3 + MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT
+DATA(tt_trampoline)
+ .skip 8 * MMU_PAGE_TABLE_ENTRIES_IDENT
+#endif