[Feature]add MT2731_MP2_MR2_SVN388 baseline version

Change-Id: Ief04314834b31e27effab435d3ca8ba33b499059
diff --git a/src/bsp/lk/arch/arm64/arch.c b/src/bsp/lk/arch/arm64/arch.c
new file mode 100644
index 0000000..c21c4ea
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/arch.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <compiler.h>
+#include <debug.h>
+#include <arch.h>
+#include <arch/ops.h>
+#include <arch/arm64.h>
+#include <arch/arm64/mmu.h>
+#include <arch/mp.h>
+#include <kernel/thread.h>
+#if WITH_KERNEL_VM
+#include <kernel/vm.h>
+#endif
+#include <lk/init.h>
+#include <lk/main.h>
+#include <platform.h>
+#include <target.h>
+#include <trace.h>
+
+#define LOCAL_TRACE 0
+
+#if WITH_SMP
+/* smp boot lock */
+static spin_lock_t arm_boot_cpu_lock = 1;
+static volatile int secondaries_to_init = 0;
+__WEAK const uint8_t *linear_cpuid_map = NULL;
+#endif
+
+static void arm64_cpu_early_init(void)
+{
+    /* set the vector base */
+    ARM64_WRITE_SYSREG(VBAR_EL1, (uint64_t)&arm64_exception_base);
+
+    /* switch to EL1 */
+    unsigned int current_el = ARM64_READ_SYSREG(CURRENTEL) >> 2;
+    if (current_el > 1) {
+        arm64_elX_to_el1();
+    }
+
+    arch_enable_fiqs();
+}
+
+void arch_early_init(void)
+{
+    arm64_cpu_early_init();
+    platform_init_mmu_mappings();
+}
+
+void arch_init(void)
+{
+#if WITH_SMP
+    arch_mp_init_percpu();
+
+    LTRACEF("midr_el1 0x%llx\n", ARM64_READ_SYSREG(midr_el1));
+
+    secondaries_to_init = SMP_MAX_CPUS - 1; /* TODO: get count from somewhere else, or add cpus as they boot */
+
+    lk_init_secondary_cpus(secondaries_to_init);
+
+    LTRACEF("releasing %d secondary cpus\n", secondaries_to_init);
+
+    /* release the secondary cpus */
+    spin_unlock(&arm_boot_cpu_lock);
+
+    /* flush the release of the lock, since the secondary cpus are running without cache on */
+    arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock));
+#endif
+}
+
+void arch_quiesce(void)
+{
+}
+
+void arch_idle(void)
+{
+    __asm__ volatile("wfi");
+}
+
+void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3)
+{
+    LTRACEF("entry %p, args 0x%lx 0x%lx 0x%lx 0x%lx\n", entry, arg0, arg1, arg2, arg3);
+
+    arch_disable_ints();
+
+    /* give target and platform a chance to put hardware into a suitable
+     * state for chain loading.
+     */
+    target_quiesce();
+    platform_quiesce();
+
+    paddr_t entry_pa;
+    paddr_t loader_pa;
+
+#if WITH_KERNEL_VM
+    entry_pa = kvaddr_to_paddr(entry);
+    if (entry_pa == (paddr_t)NULL) {
+        panic("error translating entry physical address\n");
+    }
+
+    LTRACEF("entry pa 0x%lx\n", entry_pa);
+
+    loader_pa = kvaddr_to_paddr((void *)&arm64_chain_load);
+    if (loader_pa == (paddr_t)NULL) {
+        panic("error translating loader physical address\n");
+    }
+
+    LTRACEF("loader pa 0x%lx\n", loader_pa);
+
+    /* TTBR0_EL1 already contains the physical address mapping */
+    ARM64_WRITE_SYSREG(tcr_el1, (uint64_t)MMU_TCR_FLAGS_IDENT);
+#else
+    entry_pa = (paddr_t)entry;
+    loader_pa = (paddr_t)&arm64_chain_load;
+#endif
+
+    LTRACEF("disabling instruction/data cache\n");
+    arch_disable_cache(UCACHE);
+
+    /* put the booting cpu back into close to a default state */
+    arch_quiesce();
+
+    LTRACEF("branching to physical address of loader\n");
+
+    /* branch to the physical address version of the chain loader routine */
+    void (*loader)(paddr_t entry, ulong, ulong, ulong, ulong) __NO_RETURN = (void *)loader_pa;
+    loader(entry_pa, arg0, arg1, arg2, arg3);
+}
+
+#if WITH_SMP
+void arm64_secondary_entry(ulong asm_cpu_num)
+{
+    uint cpu = arch_curr_cpu_num();
+    if (cpu != asm_cpu_num)
+        return;
+
+    arm64_cpu_early_init();
+
+    spin_lock(&arm_boot_cpu_lock);
+    spin_unlock(&arm_boot_cpu_lock);
+
+    /* run early secondary cpu init routines up to the threading level */
+    lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1);
+
+    arch_mp_init_percpu();
+
+    LTRACEF("cpu num %d\n", cpu);
+
+    /* we're done, tell the main cpu we're up */
+    atomic_add(&secondaries_to_init, -1);
+    __asm__ volatile("sev");
+
+    lk_secondary_cpu_entry();
+}
+#endif
+
diff --git a/src/bsp/lk/arch/arm64/asm.S b/src/bsp/lk/arch/arm64/asm.S
new file mode 100644
index 0000000..3adc0ee
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/asm.S
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <asm.h>
+#include <arch/asm_macros.h>
+
+/* use x9 ~ x15 as scratch registers */
+tmp     .req x9
+
+/* void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp); */
+FUNCTION(arm64_context_switch)
+    /* save old frame */
+    push x28, x29
+    push x26, x27
+    push x24, x25
+    push x22, x23
+    push x20, x21
+    push x18, x19
+    str  x30, [sp,#-16]!
+
+    /* save old sp */
+    mov  x15, sp
+    str  x15, [x0]
+
+    /* load new sp */
+    mov  sp, x1
+
+    /* restore new frame */
+    ldr  x30, [sp], #16
+    pop  x18, x19
+    pop  x20, x21
+    pop  x22, x23
+    pop  x24, x25
+    pop  x26, x27
+    pop  x28, x29
+
+    ret
+
+FUNCTION(arm64_chain_load)
+    /* shuffle the args around */
+    mov x5, x0
+    mov x0, x1
+    mov x1, x2
+    mov x2, x3
+    mov x3, x4
+    mov x4, x5
+
+#if WITH_KERNEL_VM
+    /* disable MMU */
+    mrs x5, sctlr_el1
+    bic x5, x5, #0x1
+    msr sctlr_el1, x5
+    isb
+#endif
+
+    tlbi vmalle1
+    br  x4
+
+FUNCTION(arm64_elX_to_el1)
+    mrs tmp, CurrentEL
+
+    cmp tmp, #(0b01 << 2)
+    bne .notEL1
+    /* Already in EL1 */
+    ret
+
+.notEL1:
+    cmp tmp, #(0b10 << 2)
+    beq .inEL2
+
+
+    /* set EL2 to 64bit */
+    mrs tmp, scr_el3
+    orr tmp, tmp, #(1<<10)
+    msr scr_el3, tmp
+
+
+    adr tmp, .Ltarget
+    msr elr_el3, tmp
+
+    mov tmp, #((0b1111 << 6) | (0b0101)) /* EL1h runlevel */
+    msr spsr_el3, tmp
+    b   .confEL1
+
+.inEL2:
+    adr tmp, .Ltarget
+    msr elr_el2, tmp
+    mov tmp, #((0b1111 << 6) | (0b0101)) /* EL1h runlevel */
+    msr spsr_el2, tmp
+
+
+
+.confEL1:
+    /* disable EL2 coprocessor traps */
+    mov tmp, #0x33ff
+    msr cptr_el2, tmp
+
+    /* set EL1 to 64bit */
+    mov tmp, #(1<<31)
+    msr hcr_el2, tmp
+
+    /* disable EL1 FPU traps */
+    mov tmp, #(0b11<<20)
+    msr cpacr_el1, tmp
+
+    /* set up the EL1 bounce interrupt */
+    mov tmp, sp
+    msr sp_el1, tmp
+
+    isb
+    eret
+
+
+.Ltarget:
+    ret
diff --git a/src/bsp/lk/arch/arm64/cache-ops.S b/src/bsp/lk/arch/arm64/cache-ops.S
new file mode 100644
index 0000000..04ffef8
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/cache-ops.S
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2014, Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <asm.h>
+#include <arch/ops.h>
+#include <arch/defines.h>
+
+#define LOC_SHIFT           24
+#define CLIDR_FIELD_WIDTH   3
+#define LEVEL_SHIFT         1
+#define DCISW               0x0
+#define DCCISW              0x1
+
+.text
+
+.macro cache_range_op, cache op
+    add     x2, x0, x1                  // calculate the end address
+    bic     x3, x0, #(CACHE_LINE-1)     // align the start with a cache line
+.Lcache_range_op_loop\@:
+    \cache  \op, x3
+    add     x3, x3, #CACHE_LINE
+    cmp     x3, x2
+    blo     .Lcache_range_op_loop\@
+    dsb     sy
+.endm
+
+    /* void arch_flush_cache_range(addr_t start, size_t len); */
+FUNCTION(arch_clean_cache_range)
+    cache_range_op dc cvac         // clean cache to PoC by MVA
+    ret
+
+    /* void arch_flush_invalidate_cache_range(addr_t start, size_t len); */
+FUNCTION(arch_clean_invalidate_cache_range)
+    cache_range_op dc civac        // clean & invalidate dcache to PoC by MVA
+    ret
+
+    /* void arch_invalidate_cache_range(addr_t start, size_t len); */
+FUNCTION(arch_invalidate_cache_range)
+    cache_range_op dc ivac         // invalidate dcache to PoC by MVA
+    ret
+
+    /* void arch_sync_cache_range(addr_t start, size_t len); */
+FUNCTION(arch_sync_cache_range)
+    cache_range_op dc cvau         // clean dcache to PoU by MVA
+    cache_range_op ic ivau         // invalidate icache to PoU by MVA
+    ret
+
+/* will trash x0-x2, x4-x9, x11, x14, x16-x17 */
+LOCAL_FUNCTION(do_dcsw_op)
+        cbz     x3, exit
+        adr     x14, dcsw_loop_table    // compute inner loop address
+        add     x14, x14, x0, lsl #5    // inner loop is 8x32-bit instructions
+        mov     x0, x9
+        mov     w8, #1
+loop1:
+        add     x2, x10, x10, lsr #1    // work out 3x current cache level
+        lsr     x1, x0, x2              // extract cache type bits from clidr
+        and     x1, x1, #7              // mask the bits for current cache only
+        cmp     x1, #2                  // see what cache we have at this level
+        b.lt    level_done              // nothing to do if no cache or icache
+
+        msr     csselr_el1, x10         // select current cache level in csselr
+        isb                             // isb to sych the new cssr&csidr
+        mrs     x1, ccsidr_el1          // read the new ccsidr
+        and     x2, x1, #7              // extract the length of the cache lines
+        add     x2, x2, #4              // add 4 (line length offset)
+        ubfx    x4, x1, #3, #10         // maximum way number
+        clz     w5, w4                  // bit position of way size increment
+        lsl     w9, w4, w5              // w9 = aligned max way number
+        lsl     w16, w8, w5             // w16 = way number loop decrement
+        orr     w9, w10, w9             // w9 = combine way and cache number
+        ubfx    w6, w1, #13, #15        // w6 = max set number
+        lsl     w17, w8, w2             // w17 = set number loop decrement
+        dsb     sy                      // barrier before we start this level
+        br      x14                     // jump to DC operation specific loop
+
+        .macro  dcsw_loop _op
+loop2_\_op:
+        lsl     w7, w6, w2              // w7 = aligned max set number
+
+loop3_\_op:
+        orr     w11, w9, w7             // combine cache, way and set number
+        dc      \_op, x11
+        subs    w7, w7, w17             // decrement set number
+        b.ge    loop3_\_op
+
+        subs    x9, x9, x16             // decrement way number
+        b.ge    loop2_\_op
+
+        b       level_done
+.endm
+
+level_done:
+        add     x10, x10, #2            // increment cache number
+        cmp     x3, x10
+        b.gt    loop1
+        msr     csselr_el1, xzr         // select cache level 0 in csselr
+        dsb     sy                      // barrier to complete final cache operation
+        isb
+exit:
+        ret
+
+dcsw_loop_table:
+        dcsw_loop isw
+        dcsw_loop cisw
+        dcsw_loop csw
+
+/* will trash x3, x9, x10 */
+.macro  dcsw_op shift, fw, ls
+        mrs     x9, clidr_el1
+        ubfx    x3, x9, \shift, \fw
+        lsl     x3, x3, \ls
+        mov     x10, xzr
+        bl      do_dcsw_op
+.endm
+
+/* void arch_enable_cache(uint flags);
+ * For EL1  only.
+ */
+FUNCTION(arch_enable_cache)
+    stp     x29, x30, [sp, #-32]!
+    stp     x24, x25, [sp, #16]
+
+    mov     x25, x0
+    /* check DCACHE flag */
+    tst     x25, #DCACHE
+    b.eq    .L__enable_icache
+    mrs     x24, sctlr_el1
+    tst     x24, #(1<<2)
+    b.ne    .L__enable_icache
+
+    /* invalidate dcache */
+    mov     x0, #DCISW
+    dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+
+    /* enable dcache enable bit */
+    orr     x24, x24, #(1<<2)
+    msr     sctlr_el1, x24
+
+.L__enable_icache:
+    /* check ICACHE flag */
+    tst     x25, #ICACHE
+    b.eq    .L__done_enable
+    mrs     x24, sctlr_el1
+    tst     x24, #(1<<12)
+    b.ne    .L__done_enable
+
+    /* invalidate icache */
+    dsb     sy
+    ic      iallu
+    dsb     sy
+    isb
+
+    /* enable icache enable bit */
+    mrs     x24, sctlr_el1
+    orr     x24, x24, #(1<<12)
+    msr     sctlr_el1, x24
+
+.L__done_enable:
+    ldp     x24, x25, [sp, #16]
+    ldp     x29, x30, [sp], #32
+    ret
+
+/* void arch_disable_cache(uint flags) */
+/* only for el1 here */
+FUNCTION(arch_disable_cache)
+    stp     x29, x30, [sp, #-32]!
+    str     x25, [sp, #16]
+
+    mov     x25, x0
+    /* check DCACHE flag */
+    tst     x25, #DCACHE
+    b.eq    .L__disable_icache
+    mrs     x1, sctlr_el1
+    tst     x1, #(1<<2)
+    b.eq    .L__dcache_already_disabled
+
+    /* disable dcache enable bit */
+    bic     x1, x1, #(1<<2)
+    msr     sctlr_el1, x1
+
+    /* clean & invalidate dcache */
+    mov     x0, #DCCISW
+    b       .L__flush_dcache
+
+.L__dcache_already_disabled:
+    /* invalidate dcache */
+    mov     x0, #DCISW
+.L__flush_dcache:
+    dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+
+.L__disable_icache:
+    /* check ICACHE flag */
+    tst     x25, #ICACHE
+    b.eq    .L__done_disable
+    /* disable icache enable bit */
+    mrs     x1, sctlr_el1
+    bic     x1, x1, #(1<<12)
+    msr     sctlr_el1, x1
+
+    /* invalidate icache for PE to PoU */
+    dsb     sy
+    ic      iallu
+    dsb     sy
+    isb
+
+.L__done_disable:
+    ldr     x25, [sp, #16]
+    ldp     x29, x30, [sp], #32
+    ret
diff --git a/src/bsp/lk/arch/arm64/exceptions.S b/src/bsp/lk/arch/arm64/exceptions.S
new file mode 100644
index 0000000..331de7f
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/exceptions.S
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <asm.h>
+#include <arch/asm_macros.h>
+
+.section .text.boot.vectab
+.align 12
+
+#define lr x30
+#define regsave_long_offset 0xf0
+#define regsave_short_offset 0x90
+
+.macro regsave_long
+sub  sp, sp, #32
+push x28, x29
+push x26, x27
+push x24, x25
+push x22, x23
+push x20, x21
+push x18, x19
+push x16, x17
+push x14, x15
+push x12, x13
+push x10, x11
+push x8, x9
+push x6, x7
+push x4, x5
+push x2, x3
+push x0, x1
+add  x0, sp, #regsave_long_offset
+mrs  x1, elr_el1
+mrs  x2, spsr_el1
+stp  lr, x0, [sp, #regsave_long_offset]
+stp  x1, x2, [sp, #regsave_long_offset + 16]
+.endm
+
+.macro regsave_short
+sub  sp, sp, #32
+push x16, x17
+push x14, x15
+push x12, x13
+push x10, x11
+push x8, x9
+push x6, x7
+push x4, x5
+push x2, x3
+push x0, x1
+add  x0, sp, #regsave_short_offset
+mrs  x1, elr_el1
+mrs  x2, spsr_el1
+stp  lr, x0, [sp, #regsave_short_offset]
+stp  x1, x2, [sp, #regsave_short_offset + 16]
+.endm
+
+.macro regrestore_long
+ldr  lr, [sp, #regsave_long_offset]
+ldp  x1, x2, [sp, #regsave_long_offset + 16]
+msr  elr_el1, x1
+msr  spsr_el1, x2
+pop x0, x1
+pop x2, x3
+pop x4, x5
+pop x6, x7
+pop x8, x9
+pop x10, x11
+pop x12, x13
+pop x14, x15
+pop x16, x17
+pop x18, x19
+pop x20, x21
+pop x22, x23
+pop x24, x25
+pop x26, x27
+pop x28, x29
+add sp, sp, #32
+.endm
+
+.macro regrestore_short
+ldr  lr, [sp, #regsave_short_offset]
+ldp  x1, x2, [sp, #regsave_short_offset + 16]
+msr  elr_el1, x1
+msr  spsr_el1, x2
+pop x0, x1
+pop x2, x3
+pop x4, x5
+pop x6, x7
+pop x8, x9
+pop x10, x11
+pop x12, x13
+pop x14, x15
+pop x16, x17
+add sp, sp, #32
+.endm
+
+.macro invalid_exception, which
+    regsave_long
+    mov x1, #\which
+    mov x0, sp
+    bl  arm64_invalid_exception
+    b   .
+.endm
+
+.macro irq_exception
+    regsave_short
+    msr daifclr, #1 /* reenable fiqs once elr and spsr have been saved */
+    mov x0, sp
+    bl  platform_irq
+    cbz x0, .Lirq_exception_no_preempt\@
+    bl  thread_preempt
+.Lirq_exception_no_preempt\@:
+    msr daifset, #1 /* disable fiqs to protect elr and spsr restore */
+    b   arm64_exc_shared_restore_short
+.endm
+
+FUNCTION(arm64_exception_base)
+
+/* exceptions from current EL, using SP0 */
+LOCAL_FUNCTION(arm64_sync_exc_current_el_SP0)
+    invalid_exception 0
+
+.org 0x080
+LOCAL_FUNCTION(arm64_irq_current_el_SP0)
+    invalid_exception 1
+
+.org 0x100
+LOCAL_FUNCTION(arm64_fiq_current_el_SP0)
+    invalid_exception 2
+
+.org 0x180
+LOCAL_FUNCTION(arm64_err_exc_current_el_SP0)
+    invalid_exception 3
+
+/* exceptions from current EL, using SPx */
+.org 0x200
+LOCAL_FUNCTION(arm64_sync_exc_current_el_SPx)
+    regsave_long
+    mov x0, sp
+    bl  arm64_sync_exception
+    b  arm64_exc_shared_restore_long
+
+.org 0x280
+LOCAL_FUNCTION(arm64_irq_current_el_SPx)
+    irq_exception
+
+.org 0x300
+LOCAL_FUNCTION(arm64_fiq_current_el_SPx)
+    regsave_short
+    mov x0, sp
+    bl  platform_fiq
+    b  arm64_exc_shared_restore_short
+
+.org 0x380
+LOCAL_FUNCTION(arm64_err_exc_current_el_SPx)
+    invalid_exception 0x13
+
+/* exceptions from lower EL, running arm64 */
+.org 0x400
+LOCAL_FUNCTION(arm64_sync_exc_lower_el_64)
+    invalid_exception 0x20
+
+.org 0x480
+LOCAL_FUNCTION(arm64_irq_lower_el_64)
+    invalid_exception 0x21
+
+.org 0x500
+LOCAL_FUNCTION(arm64_fiq_lower_el_64)
+    invalid_exception 0x22
+
+.org 0x580
+LOCAL_FUNCTION(arm64_err_exc_lower_el_64)
+    invalid_exception 0x23
+
+/* exceptions from lower EL, running arm32 */
+.org 0x600
+LOCAL_FUNCTION(arm64_sync_exc_lower_el_32)
+    regsave_long
+    mov x0, sp
+    bl  arm64_sync_exception
+    b  arm64_exc_shared_restore_long
+
+.org 0x680
+LOCAL_FUNCTION(arm64_irq_lower_el_32)
+    irq_exception
+
+.org 0x700
+LOCAL_FUNCTION(arm64_fiq_lower_el_32)
+    regsave_short
+    mov x0, sp
+    bl  platform_fiq
+    b  arm64_exc_shared_restore_short
+
+.org 0x780
+LOCAL_FUNCTION(arm64_err_exc_lower_el_32)
+    invalid_exception 0x33
+
+LOCAL_FUNCTION(arm64_exc_shared_restore_long)
+    regrestore_long
+    eret
+
+LOCAL_FUNCTION(arm64_exc_shared_restore_short)
+       regrestore_short
+       eret
diff --git a/src/bsp/lk/arch/arm64/exceptions_c.c b/src/bsp/lk/arch/arm64/exceptions_c.c
new file mode 100644
index 0000000..3ff211d
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/exceptions_c.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <debug.h>
+#include <arch/arch_ops.h>
+#include <arch/arm64.h>
+
+#define SHUTDOWN_ON_FATAL 1
+
+struct fault_handler_table_entry {
+    uint64_t pc;
+    uint64_t fault_handler;
+};
+
+extern struct fault_handler_table_entry __fault_handler_table_start[];
+extern struct fault_handler_table_entry __fault_handler_table_end[];
+
+static void dump_iframe(const struct arm64_iframe_long *iframe)
+{
+    printf("iframe %p:\n", iframe);
+    printf("x0  0x%16llx x1  0x%16llx x2  0x%16llx x3  0x%16llx\n", iframe->r[0], iframe->r[1], iframe->r[2], iframe->r[3]);
+    printf("x4  0x%16llx x5  0x%16llx x6  0x%16llx x7  0x%16llx\n", iframe->r[4], iframe->r[5], iframe->r[6], iframe->r[7]);
+    printf("x8  0x%16llx x9  0x%16llx x10 0x%16llx x11 0x%16llx\n", iframe->r[8], iframe->r[9], iframe->r[10], iframe->r[11]);
+    printf("x12 0x%16llx x13 0x%16llx x14 0x%16llx x15 0x%16llx\n", iframe->r[12], iframe->r[13], iframe->r[14], iframe->r[15]);
+    printf("x16 0x%16llx x17 0x%16llx x18 0x%16llx x19 0x%16llx\n", iframe->r[16], iframe->r[17], iframe->r[18], iframe->r[19]);
+    printf("x20 0x%16llx x21 0x%16llx x22 0x%16llx x23 0x%16llx\n", iframe->r[20], iframe->r[21], iframe->r[22], iframe->r[23]);
+    printf("x24 0x%16llx x25 0x%16llx x26 0x%16llx x27 0x%16llx\n", iframe->r[24], iframe->r[25], iframe->r[26], iframe->r[27]);
+    printf("x28 0x%16llx x29 0x%16llx lr  0x%16llx sp  0x%16llx\n", iframe->r[28], iframe->r[29], iframe->r[30], iframe->r[31]);
+    printf("elr 0x%16llx\n", iframe->elr);
+    printf("spsr 0x%16llx\n", iframe->spsr);
+}
+
+void arm64_sync_exception(struct arm64_iframe_long *iframe)
+{
+    struct fault_handler_table_entry *fault_handler;
+    uint32_t esr = ARM64_READ_SYSREG(esr_el1);
+    uint32_t ec = esr >> 26;
+    uint32_t il = (esr >> 25) & 0x1;
+    uint32_t iss = esr & ((1<<24) - 1);
+
+#ifdef WITH_LIB_SYSCALL
+    if (ec == 0x15 || ec == 0x11) { // syscall 64/32
+        void arm64_syscall(struct arm64_iframe_long *iframe);
+        arch_enable_fiqs();
+        arm64_syscall(iframe);
+        arch_disable_fiqs();
+        return;
+    }
+#endif
+
+    /* floating point */
+    if (ec == 0x07) {
+        arm64_fpu_exception(iframe);
+        return;
+    }
+
+    for (fault_handler = __fault_handler_table_start; fault_handler < __fault_handler_table_end; fault_handler++) {
+        if (fault_handler->pc == iframe->elr) {
+            iframe->elr = fault_handler->fault_handler;
+            return;
+        }
+    }
+
+    printf("sync_exception\n");
+    dump_iframe(iframe);
+
+    printf("ESR 0x%x: ec 0x%x, il 0x%x, iss 0x%x\n", esr, ec, il, iss);
+
+    if (ec == 0x15) { // syscall
+        printf("syscall\n");
+        return;
+    }
+
+    panic("die\n");
+}
+
+void arm64_invalid_exception(struct arm64_iframe_long *iframe, unsigned int which)
+{
+    printf("invalid exception, which 0x%x\n", which);
+    dump_iframe(iframe);
+
+    panic("die\n");
+}
+
+
+
diff --git a/src/bsp/lk/arch/arm64/exceptions_el2_el3.S b/src/bsp/lk/arch/arm64/exceptions_el2_el3.S
new file mode 100644
index 0000000..8208a2b
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/exceptions_el2_el3.S
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <asm.h>
+#include <arch/arm64/mmu.h>
+#include <arch/asm_macros.h>
+
+/* use x9 ~ x15 as scratch registers */
+tmp     .req x9
+tmp2    .req x10
+
+#define ESR_EC_SHIFT    26
+#define ESR_EC_LENGTH   6
+#define EC_AARCH64_HVC  0x16
+#define EC_AARCH64_SMC  0x17
+
+.weak mtk_sip
+
+FUNCTION(setup_el2_or_el3_exception_base)
+    /* install el2 or el3 exception table */
+    ldr     tmp, =.Lel2_or_el3_exception_base
+#if WITH_KERNEL_VM
+    and     tmp, tmp, #~(~0 << MMU_KERNEL_SIZE_SHIFT)
+#endif
+    mrs     tmp2, CurrentEL
+    cmp     tmp2, #(0b11 << 2)  /* in EL3? */
+    b.eq    .Lin_el3
+    cmp     tmp2, #(0b10 << 2)  /* in EL2? */
+    b.eq    .Lin_el2
+.Lin_el3:
+    msr     vbar_el3, tmp
+    b       .Lexit
+.Lin_el2:
+    msr     vbar_el2, tmp
+.Lexit:
+    ret
+
+.section .text.boot.vectab
+.align 12
+
+/*
+ * The next boot stage after lk can be ATF (lk as bl2 bootloader), linux
+ * kernel or hypervisor (lk as bl33 bootloader). Different entry execution
+ * level is required for each next boot stage,
+ *      - ATF: from EL3
+ *      - linux kernel: from EL2 or EL1
+ *      - hypervisor: from EL2
+ * It's necessary for lk to return to its beginning entry level before jumping
+ * to next boot stage.
+ *
+ * SMC or HVC will be used for this purpose, thus we install only the exception
+ * vector to handle sync exception from lower exception level.
+ *
+ * [TODO] add rest exception vectors to catch unhandled exceptions.
+ */
+.Lel2_or_el3_exception_base:
+FUNCTION(arm64_el2_or_el3_exception_base)
+/* exceptions from lower EL, running arm64 */
+.org 0x400
+LOCAL_FUNCTION(arm64_sync_exc_lower_el_64)
+#if WITH_KERNEL_VM
+    mov     tmp, sp
+    and     sp, tmp, #~(~0 << MMU_KERNEL_SIZE_SHIFT)
+#endif
+    mrs     tmp, CurrentEL
+    cmp     tmp, #(0b11 << 2)   /* in EL3? */
+    b.ne    .LnotEL3
+    mrs     tmp, esr_el3
+    b       .Lcheck_ec
+
+.LnotEL3:
+    cmp     tmp, #(0b10 << 2)   /* in EL2? */
+    b.ne    .Lunhandled_sync_exc
+    mrs     tmp, esr_el2
+
+.Lcheck_ec:
+    ubfx    tmp, tmp, #ESR_EC_SHIFT, #ESR_EC_LENGTH
+    cmp     tmp, #EC_AARCH64_SMC
+    b.eq    .Lsip_handler
+    cmp     tmp, #EC_AARCH64_HVC
+    b.ne    .Lunhandled_sync_exc
+
+.Lsip_handler:
+    b       mtk_sip
+
+.Lunhandled_sync_exc:
+    b       .
+
diff --git a/src/bsp/lk/arch/arm64/fpu.c b/src/bsp/lk/arch/arm64/fpu.c
new file mode 100644
index 0000000..162b5c1
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/fpu.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <arch/arm64.h>
+#include <kernel/thread.h>
+#include <trace.h>
+
+#define LOCAL_TRACE 0
+
+static struct fpstate *current_fpstate[SMP_MAX_CPUS];
+
+static void arm64_fpu_load_state(struct thread *t)
+{
+    uint cpu = arch_curr_cpu_num();
+    struct fpstate *fpstate = &t->arch.fpstate;
+
+    if (fpstate == current_fpstate[cpu] && fpstate->current_cpu == cpu) {
+        LTRACEF("cpu %d, thread %s, fpstate already valid\n", cpu, t->name);
+        return;
+    }
+    LTRACEF("cpu %d, thread %s, load fpstate %p, last cpu %d, last fpstate %p\n",
+            cpu, t->name, fpstate, fpstate->current_cpu, current_fpstate[cpu]);
+    fpstate->current_cpu = cpu;
+    current_fpstate[cpu] = fpstate;
+
+
+    STATIC_ASSERT(sizeof(fpstate->regs) == 16 * 32);
+    __asm__ volatile("ldp     q0, q1, [%0, #(0 * 32)]\n"
+                     "ldp     q2, q3, [%0, #(1 * 32)]\n"
+                     "ldp     q4, q5, [%0, #(2 * 32)]\n"
+                     "ldp     q6, q7, [%0, #(3 * 32)]\n"
+                     "ldp     q8, q9, [%0, #(4 * 32)]\n"
+                     "ldp     q10, q11, [%0, #(5 * 32)]\n"
+                     "ldp     q12, q13, [%0, #(6 * 32)]\n"
+                     "ldp     q14, q15, [%0, #(7 * 32)]\n"
+                     "ldp     q16, q17, [%0, #(8 * 32)]\n"
+                     "ldp     q18, q19, [%0, #(9 * 32)]\n"
+                     "ldp     q20, q21, [%0, #(10 * 32)]\n"
+                     "ldp     q22, q23, [%0, #(11 * 32)]\n"
+                     "ldp     q24, q25, [%0, #(12 * 32)]\n"
+                     "ldp     q26, q27, [%0, #(13 * 32)]\n"
+                     "ldp     q28, q29, [%0, #(14 * 32)]\n"
+                     "ldp     q30, q31, [%0, #(15 * 32)]\n"
+                     "msr     fpcr, %1\n"
+                     "msr     fpsr, %2\n"
+                     :: "r"(fpstate),
+                     "r"((uint64_t)fpstate->fpcr),
+                     "r"((uint64_t)fpstate->fpsr));
+}
+
+void arm64_fpu_save_state(struct thread *t)
+{
+    uint64_t fpcr, fpsr;
+    struct fpstate *fpstate = &t->arch.fpstate;
+    __asm__ volatile("stp     q0, q1, [%2, #(0 * 32)]\n"
+                     "stp     q2, q3, [%2, #(1 * 32)]\n"
+                     "stp     q4, q5, [%2, #(2 * 32)]\n"
+                     "stp     q6, q7, [%2, #(3 * 32)]\n"
+                     "stp     q8, q9, [%2, #(4 * 32)]\n"
+                     "stp     q10, q11, [%2, #(5 * 32)]\n"
+                     "stp     q12, q13, [%2, #(6 * 32)]\n"
+                     "stp     q14, q15, [%2, #(7 * 32)]\n"
+                     "stp     q16, q17, [%2, #(8 * 32)]\n"
+                     "stp     q18, q19, [%2, #(9 * 32)]\n"
+                     "stp     q20, q21, [%2, #(10 * 32)]\n"
+                     "stp     q22, q23, [%2, #(11 * 32)]\n"
+                     "stp     q24, q25, [%2, #(12 * 32)]\n"
+                     "stp     q26, q27, [%2, #(13 * 32)]\n"
+                     "stp     q28, q29, [%2, #(14 * 32)]\n"
+                     "stp     q30, q31, [%2, #(15 * 32)]\n"
+                     "mrs     %0, fpcr\n"
+                     "mrs     %1, fpsr\n"
+                     : "=r"(fpcr), "=r"(fpsr)
+                     : "r"(fpstate));
+
+    fpstate->fpcr = fpcr;
+    fpstate->fpsr = fpsr;
+
+    LTRACEF("thread %s, fpcr %x, fpsr %x\n", t->name, fpstate->fpcr, fpstate->fpsr);
+}
+
+void arm64_fpu_exception(struct arm64_iframe_long *iframe)
+{
+    uint64_t cpacr = ARM64_READ_SYSREG(cpacr_el1);
+    if (((cpacr >> 20) & 3) != 3) {
+        cpacr |= 3 << 20;
+        ARM64_WRITE_SYSREG(cpacr_el1, cpacr);
+        thread_t *t = get_current_thread();
+        if (likely(t))
+            arm64_fpu_load_state(t);
+        return;
+    }
+}
diff --git a/src/bsp/lk/arch/arm64/include/arch/arch_ops.h b/src/bsp/lk/arch/arm64/include/arch/arch_ops.h
new file mode 100644
index 0000000..6c76c2b
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/arch_ops.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2008-2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+
+#ifndef ASSEMBLY
+
+#include <stdbool.h>
+#include <compiler.h>
+#include <reg.h>
+#include <arch/arm64.h>
+
+#define USE_GCC_ATOMICS 1
+#define ENABLE_CYCLE_COUNTER 1
+
+// override of some routines
+static inline void arch_enable_ints(void)
+{
+    CF;
+    __asm__ volatile("msr daifclr, #2" ::: "memory");
+}
+
+static inline void arch_disable_ints(void)
+{
+    __asm__ volatile("msr daifset, #2" ::: "memory");
+    CF;
+}
+
+static inline bool arch_ints_disabled(void)
+{
+    unsigned long state;
+
+    __asm__ volatile("mrs %0, daif" : "=r"(state));
+    state &= (1<<7);
+
+    return !!state;
+}
+
+static inline void arch_enable_fiqs(void)
+{
+    CF;
+    __asm__ volatile("msr daifclr, #1" ::: "memory");
+}
+
+static inline void arch_disable_fiqs(void)
+{
+    __asm__ volatile("msr daifset, #1" ::: "memory");
+    CF;
+}
+
+// XXX
+static inline bool arch_fiqs_disabled(void)
+{
+    unsigned long state;
+
+    __asm__ volatile("mrs %0, daif" : "=r"(state));
+    state &= (1<<6);
+
+    return !!state;
+}
+
+#define mb()        __asm__ volatile("dsb sy" : : : "memory")
+#define rmb()       __asm__ volatile("dsb ld" : : : "memory")
+#define wmb()       __asm__ volatile("dsb st" : : : "memory")
+
+#ifdef WITH_SMP
+#define smp_mb()    __asm__ volatile("dmb ish" : : : "memory")
+#define smp_rmb()   __asm__ volatile("dmb ishld" : : : "memory")
+#define smp_wmb()   __asm__ volatile("dmb ishst" : : : "memory")
+#else
+#define smp_mb()    CF
+#define smp_wmb()   CF
+#define smp_rmb()   CF
+#endif
+
+static inline int atomic_add(volatile int *ptr, int val)
+{
+#if USE_GCC_ATOMICS
+    return __atomic_fetch_add(ptr, val, __ATOMIC_RELAXED);
+#else
+    int old;
+    int temp;
+    int test;
+
+    do {
+        __asm__ volatile(
+            "ldrex  %[old], [%[ptr]]\n"
+            "adds   %[temp], %[old], %[val]\n"
+            "strex  %[test], %[temp], [%[ptr]]\n"
+            : [old]"=&r" (old), [temp]"=&r" (temp), [test]"=&r" (test)
+            : [ptr]"r" (ptr), [val]"r" (val)
+            : "memory", "cc");
+
+    } while (test != 0);
+
+    return old;
+#endif
+}
+
+static inline int atomic_or(volatile int *ptr, int val)
+{
+#if USE_GCC_ATOMICS
+    return __atomic_fetch_or(ptr, val, __ATOMIC_RELAXED);
+#else
+    int old;
+    int temp;
+    int test;
+
+    do {
+        __asm__ volatile(
+            "ldrex  %[old], [%[ptr]]\n"
+            "orrs   %[temp], %[old], %[val]\n"
+            "strex  %[test], %[temp], [%[ptr]]\n"
+            : [old]"=&r" (old), [temp]"=&r" (temp), [test]"=&r" (test)
+            : [ptr]"r" (ptr), [val]"r" (val)
+            : "memory", "cc");
+
+    } while (test != 0);
+
+    return old;
+#endif
+}
+
+static inline int atomic_and(volatile int *ptr, int val)
+{
+#if USE_GCC_ATOMICS
+    return __atomic_fetch_and(ptr, val, __ATOMIC_RELAXED);
+#else
+    int old;
+    int temp;
+    int test;
+
+    do {
+        __asm__ volatile(
+            "ldrex  %[old], [%[ptr]]\n"
+            "ands   %[temp], %[old], %[val]\n"
+            "strex  %[test], %[temp], [%[ptr]]\n"
+            : [old]"=&r" (old), [temp]"=&r" (temp), [test]"=&r" (test)
+            : [ptr]"r" (ptr), [val]"r" (val)
+            : "memory", "cc");
+
+    } while (test != 0);
+
+    return old;
+#endif
+}
+
+static inline int atomic_swap(volatile int *ptr, int val)
+{
+#if USE_GCC_ATOMICS
+    return __atomic_exchange_n(ptr, val, __ATOMIC_RELAXED);
+#else
+    int old;
+    int test;
+
+    do {
+        __asm__ volatile(
+            "ldrex  %[old], [%[ptr]]\n"
+            "strex  %[test], %[val], [%[ptr]]\n"
+            : [old]"=&r" (old), [test]"=&r" (test)
+            : [ptr]"r" (ptr), [val]"r" (val)
+            : "memory");
+
+    } while (test != 0);
+
+    return old;
+#endif
+}
+
+static inline int atomic_cmpxchg(volatile int *ptr, int oldval, int newval)
+{
+#if USE_GCC_ATOMICS
+    __atomic_compare_exchange_n(ptr, &oldval, newval, false,
+                                __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+    return oldval;
+#else
+    int old;
+    int test;
+
+    do {
+        __asm__ volatile(
+            "ldrex  %[old], [%[ptr]]\n"
+            "mov    %[test], #0\n"
+            "teq    %[old], %[oldval]\n"
+#if ARM_ISA_ARMV7M
+            "bne    0f\n"
+            "strex  %[test], %[newval], [%[ptr]]\n"
+            "0:\n"
+#else
+            "strexeq %[test], %[newval], [%[ptr]]\n"
+#endif
+            : [old]"=&r" (old), [test]"=&r" (test)
+            : [ptr]"r" (ptr), [oldval]"Ir" (oldval), [newval]"r" (newval)
+            : "cc");
+
+    } while (test != 0);
+
+    return old;
+#endif
+}
+
+static inline uint32_t arch_cycle_count(void)
+{
+#if ARM_ISA_ARM7M
+#if ENABLE_CYCLE_COUNTER
+#define DWT_CYCCNT (0xE0001004)
+    return *REG32(DWT_CYCCNT);
+#else
+    return 0;
+#endif
+#elif ARM_ISA_ARMV7
+    uint32_t count;
+    __asm__ volatile("mrc       p15, 0, %0, c9, c13, 0"
+        : "=r" (count)
+        );
+    return count;
+#else
+//#warning no arch_cycle_count implementation
+    return 0;
+#endif
+}
+
+/* use the cpu local thread context pointer to store current_thread */
+static inline struct thread *get_current_thread(void)
+{
+    return (struct thread *)ARM64_READ_SYSREG(tpidr_el1);
+}
+
+static inline void set_current_thread(struct thread *t)
+{
+    ARM64_WRITE_SYSREG(tpidr_el1, (uint64_t)t);
+}
+
+#if WITH_SMP
+extern const uint8_t *linear_cpuid_map;
+
+static inline uint arch_curr_cpu_num(void)
+{
+    uint64_t mpidr =  ARM64_READ_SYSREG(mpidr_el1);
+    mpidr = ((mpidr & ((1U << SMP_CPU_ID_BITS) - 1)) >> 8 << SMP_CPU_CLUSTER_SHIFT) | (mpidr & 0xff);
+    return linear_cpuid_map ? *(linear_cpuid_map + mpidr) : mpidr;
+}
+#else
+static inline uint arch_curr_cpu_num(void)
+{
+    return 0;
+}
+#endif
+
+#endif // ASSEMBLY
+
diff --git a/src/bsp/lk/arch/arm64/include/arch/arch_thread.h b/src/bsp/lk/arch/arm64/include/arch/arch_thread.h
new file mode 100644
index 0000000..922ff14
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/arch_thread.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+
+#include <sys/types.h>
+
+struct fpstate {
+    uint64_t    regs[64];
+    uint32_t    fpcr;
+    uint32_t    fpsr;
+    uint        current_cpu;
+};
+
+struct arch_thread {
+    vaddr_t sp;
+    struct fpstate fpstate __attribute__((aligned(0x10)));
+};
+
diff --git a/src/bsp/lk/arch/arm64/include/arch/arm64.h b/src/bsp/lk/arch/arm64/include/arch/arm64.h
new file mode 100644
index 0000000..173b03b
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/arm64.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <compiler.h>
+
+__BEGIN_CDECLS
+
+#define DSB __asm__ volatile("dsb sy" ::: "memory")
+#define ISB __asm__ volatile("isb" ::: "memory")
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+
+#define ARM64_READ_SYSREG(reg) \
+({ \
+    uint64_t _val; \
+    __asm__ volatile("mrs %0," TOSTRING(reg) : "=r" (_val)); \
+    _val; \
+})
+
+#define ARM64_WRITE_SYSREG(reg, val) \
+({ \
+    __asm__ volatile("msr " TOSTRING(reg) ", %0" :: "r" (val)); \
+    ISB; \
+})
+
+void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp);
+
+/* exception handling */
+struct arm64_iframe_long {
+    uint64_t r[32];
+    uint64_t elr;
+    uint64_t spsr;
+};
+
+struct arm64_iframe_short {
+    uint64_t r[20];
+    uint64_t elr;
+    uint64_t spsr;
+};
+
+struct thread;
+extern void arm64_exception_base(void);
+extern void arm64_el2_or_el3_exception_base(void);
+void arm64_elX_to_el1(void);
+void arm64_fpu_exception(struct arm64_iframe_long *iframe);
+void arm64_fpu_save_state(struct thread *thread);
+void arm64_chain_load(paddr_t entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3) __NO_RETURN;
+
+static inline void arm64_fpu_pre_context_switch(struct thread *thread)
+{
+    uint64_t cpacr = ARM64_READ_SYSREG(cpacr_el1);
+    if ((cpacr >> 20) & 3) {
+        arm64_fpu_save_state(thread);
+        cpacr &= ~(3 << 20);
+        ARM64_WRITE_SYSREG(cpacr_el1, cpacr);
+    }
+}
+
+__END_CDECLS
+
diff --git a/src/bsp/lk/arch/arm64/include/arch/arm64/mmu.h b/src/bsp/lk/arch/arm64/include/arch/arm64/mmu.h
new file mode 100644
index 0000000..bd6aaf2
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/arm64/mmu.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2014 Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __ARCH_ARM64_MMU_H
+#define __ARCH_ARM64_MMU_H
+
+#include <arch/defines.h>
+
+#define IFTE(c,t,e) (!!(c) * (t) | !(c) * (e))
+#define NBITS01(n)      IFTE(n, 1, 0)
+#define NBITS02(n)      IFTE((n) >>  1,  1 + NBITS01((n) >>  1), NBITS01(n))
+#define NBITS04(n)      IFTE((n) >>  2,  2 + NBITS02((n) >>  2), NBITS02(n))
+#define NBITS08(n)      IFTE((n) >>  4,  4 + NBITS04((n) >>  4), NBITS04(n))
+#define NBITS16(n)      IFTE((n) >>  8,  8 + NBITS08((n) >>  8), NBITS08(n))
+#define NBITS32(n)      IFTE((n) >> 16, 16 + NBITS16((n) >> 16), NBITS16(n))
+#define NBITS(n)        IFTE((n) >> 32, 32 + NBITS32((n) >> 32), NBITS32(n))
+
+#ifndef MMU_KERNEL_SIZE_SHIFT
+#define KERNEL_ASPACE_BITS (NBITS(0xffffffffffffffff-KERNEL_ASPACE_BASE))
+#define KERNEL_BASE_BITS (NBITS(0xffffffffffffffff-KERNEL_BASE))
+#if KERNEL_BASE_BITS > KERNEL_ASPACE_BITS
+#define KERNEL_ASPACE_BITS KERNEL_BASE_BITS /* KERNEL_BASE should not be below KERNEL_ASPACE_BASE */
+#endif
+
+#if KERNEL_ASPACE_BITS < 25
+#define MMU_KERNEL_SIZE_SHIFT (25)
+#else
+#define MMU_KERNEL_SIZE_SHIFT (KERNEL_ASPACE_BITS)
+#endif
+#endif
+
+#ifndef MMU_USER_SIZE_SHIFT
+#define MMU_USER_SIZE_SHIFT 48
+#endif
+
+#ifndef MMU_IDENT_SIZE_SHIFT
+#define MMU_IDENT_SIZE_SHIFT 42 /* Max size supported by block mappings */
+#endif
+
+#define MMU_KERNEL_PAGE_SIZE_SHIFT      (PAGE_SIZE_SHIFT)
+#define MMU_USER_PAGE_SIZE_SHIFT        (USER_PAGE_SIZE_SHIFT)
+
+#if MMU_IDENT_SIZE_SHIFT < 25
+#error MMU_IDENT_SIZE_SHIFT too small
+#elif MMU_IDENT_SIZE_SHIFT <= 29 /* Use 2MB block mappings (4K page size) */
+#define MMU_IDENT_PAGE_SIZE_SHIFT       (SHIFT_4K)
+#elif MMU_IDENT_SIZE_SHIFT <= 30 /* Use 512MB block mappings (64K page size) */
+#define MMU_IDENT_PAGE_SIZE_SHIFT       (SHIFT_64K)
+#elif MMU_IDENT_SIZE_SHIFT <= 39 /* Use 1GB block mappings (4K page size) */
+#define MMU_IDENT_PAGE_SIZE_SHIFT       (SHIFT_4K)
+#elif MMU_IDENT_SIZE_SHIFT <= 42 /* Use 512MB block mappings (64K page size) */
+#define MMU_IDENT_PAGE_SIZE_SHIFT       (SHIFT_64K)
+#else
+#error MMU_IDENT_SIZE_SHIFT too large
+#endif
+
+/*
+ * TCR TGx values
+ *
+ * Page size:   4K      16K     64K
+ * TG0:         0       2       1
+ * TG1:         2       1       3
+ */
+
+#define MMU_TG0(page_size_shift) ((((page_size_shift == 14) & 1) << 1) | \
+                                  ((page_size_shift == 16) & 1))
+
+#define MMU_TG1(page_size_shift) ((((page_size_shift == 12) & 1) << 1) | \
+                                  ((page_size_shift == 14) & 1) | \
+                                  ((page_size_shift == 16) & 1) | \
+                                  (((page_size_shift == 16) & 1) << 1))
+
+#define MMU_LX_X(page_shift, level) ((4 - (level)) * ((page_shift) - 3) + 3)
+
+#if MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 0)
+#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 0)
+#elif MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 1)
+#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 1)
+#elif MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 2)
+#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 2)
+#elif MMU_USER_SIZE_SHIFT > MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 3)
+#define MMU_USER_TOP_SHIFT MMU_LX_X(MMU_USER_PAGE_SIZE_SHIFT, 3)
+#else
+#error User address space size must be larger than page size
+#endif
+#define MMU_USER_PAGE_TABLE_ENTRIES_TOP (0x1 << (MMU_USER_SIZE_SHIFT - MMU_USER_TOP_SHIFT))
+
+#if MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 0)
+#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 0)
+#elif MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 1)
+#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 1)
+#elif MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 2)
+#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 2)
+#elif MMU_KERNEL_SIZE_SHIFT > MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 3)
+#define MMU_KERNEL_TOP_SHIFT MMU_LX_X(MMU_KERNEL_PAGE_SIZE_SHIFT, 3)
+#else
+#error Kernel address space size must be larger than page size
+#endif
+#define MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP (0x1 << (MMU_KERNEL_SIZE_SHIFT - MMU_KERNEL_TOP_SHIFT))
+
+#if MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 0)
+#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 0)
+#elif MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 1)
+#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 1)
+#elif MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 2)
+#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 2)
+#elif MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 3)
+#define MMU_IDENT_TOP_SHIFT MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 3)
+#else
+#error Ident address space size must be larger than page size
+#endif
+#define MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT (MMU_IDENT_SIZE_SHIFT - MMU_IDENT_TOP_SHIFT)
+#define MMU_PAGE_TABLE_ENTRIES_IDENT (0x1 << MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT)
+
+#define MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT      (30)
+
+#ifndef ASSEMBLY
+#define BM(base, count, val) (((val) & ((1UL << (count)) - 1)) << (base))
+#else
+#define BM(base, count, val) (((val) & ((0x1 << (count)) - 1)) << (base))
+#endif
+
+#define MMU_SH_NON_SHAREABLE                    (0)
+#define MMU_SH_OUTER_SHAREABLE                  (2)
+#define MMU_SH_INNER_SHAREABLE                  (3)
+
+#define MMU_RGN_NON_CACHEABLE                   (0)
+#define MMU_RGN_WRITE_BACK_ALLOCATE             (1)
+#define MMU_RGN_WRITE_THROUGH_NO_ALLOCATE       (2)
+#define MMU_RGN_WRITE_BACK_NO_ALLOCATE          (3)
+
+#define MMU_TCR_TBI1                            BM(38, 1, 1)
+#define MMU_TCR_TBI0                            BM(37, 1, 1)
+#define MMU_TCR_AS                              BM(36, 1, 1)
+#define MMU_TCR_IPS(size)                       BM(32, 3, (size))
+#define MMU_TCR_TG1(granule_size)               BM(30, 2, (granule_size))
+#define MMU_TCR_SH1(shareability_flags)         BM(28, 2, (shareability_flags))
+#define MMU_TCR_ORGN1(cache_flags)              BM(26, 2, (cache_flags))
+#define MMU_TCR_IRGN1(cache_flags)              BM(24, 2, (cache_flags))
+#define MMU_TCR_EPD1                            BM(23, 1, 1)
+#define MMU_TCR_A1                              BM(22, 1, 1)
+#define MMU_TCR_T1SZ(size)                      BM(16, 6, (size))
+#define MMU_TCR_TG0(granule_size)               BM(14, 2, (granule_size))
+#define MMU_TCR_SH0(shareability_flags)         BM(12, 2, (shareability_flags))
+#define MMU_TCR_ORGN0(cache_flags)              BM(10, 2, (cache_flags))
+#define MMU_TCR_IRGN0(cache_flags)              BM( 8, 2, (cache_flags))
+#define MMU_TCR_EPD0                            BM( 7, 1, 1)
+#define MMU_TCR_T0SZ(size)                      BM( 0, 6, (size))
+
+#define MMU_MAIR_ATTR(index, attr)              BM(index * 8, 8, (attr))
+
+
+/* L0/L1/L2/L3 descriptor types */
+#define MMU_PTE_DESCRIPTOR_INVALID              BM(0, 2, 0)
+#define MMU_PTE_DESCRIPTOR_MASK                 BM(0, 2, 3)
+
+/* L0/L1/L2 descriptor types */
+#define MMU_PTE_L012_DESCRIPTOR_BLOCK           BM(0, 2, 1)
+#define MMU_PTE_L012_DESCRIPTOR_TABLE           BM(0, 2, 3)
+
+/* L3 descriptor types */
+#define MMU_PTE_L3_DESCRIPTOR_PAGE              BM(0, 2, 3)
+
+/* Output address mask */
+#define MMU_PTE_OUTPUT_ADDR_MASK                BM(12, 36, 0xfffffffff)
+
+/* Table attrs */
+#define MMU_PTE_ATTR_NS_TABLE                   BM(63, 1, 1)
+#define MMU_PTE_ATTR_AP_TABLE_NO_WRITE          BM(62, 1, 1)
+#define MMU_PTE_ATTR_AP_TABLE_NO_EL0            BM(61, 1, 1)
+#define MMU_PTE_ATTR_UXN_TABLE                  BM(60, 1, 1)
+#define MMU_PTE_ATTR_PXN_TABLE                  BM(59, 1, 1)
+
+/* Block/Page attrs */
+#define MMU_PTE_ATTR_RES_SOFTWARE               BM(55, 4, 0xf)
+#define MMU_PTE_ATTR_UXN                        BM(54, 1, 1)
+#define MMU_PTE_ATTR_PXN                        BM(53, 1, 1)
+#define MMU_PTE_ATTR_CONTIGUOUS                 BM(52, 1, 1)
+
+#define MMU_PTE_ATTR_NON_GLOBAL                 BM(11, 1, 1)
+#define MMU_PTE_ATTR_AF                         BM(10, 1, 1)
+
+#define MMU_PTE_ATTR_SH_NON_SHAREABLE           BM(8, 2, 0)
+#define MMU_PTE_ATTR_SH_OUTER_SHAREABLE         BM(8, 2, 2)
+#define MMU_PTE_ATTR_SH_INNER_SHAREABLE         BM(8, 2, 3)
+
+#define MMU_PTE_ATTR_AP_P_RW_U_NA               BM(6, 2, 0)
+#define MMU_PTE_ATTR_AP_P_RW_U_RW               BM(6, 2, 1)
+#define MMU_PTE_ATTR_AP_P_RO_U_NA               BM(6, 2, 2)
+#define MMU_PTE_ATTR_AP_P_RO_U_RO               BM(6, 2, 3)
+#define MMU_PTE_ATTR_AP_MASK                    BM(6, 2, 3)
+
+#define MMU_PTE_ATTR_NON_SECURE                 BM(5, 1, 1)
+
+#define MMU_PTE_ATTR_ATTR_INDEX(attrindex)      BM(2, 3, attrindex)
+#define MMU_PTE_ATTR_ATTR_INDEX_MASK            MMU_PTE_ATTR_ATTR_INDEX(7)
+
+/* Default configuration for main kernel page table:
+ *    - do cached translation walks
+ */
+
+/* Device-nGnRnE memory */
+#define MMU_MAIR_ATTR0                  MMU_MAIR_ATTR(0, 0x00)
+#define MMU_PTE_ATTR_STRONGLY_ORDERED   MMU_PTE_ATTR_ATTR_INDEX(0)
+
+/* Device-nGnRE memory */
+#define MMU_MAIR_ATTR1                  MMU_MAIR_ATTR(1, 0x04)
+#define MMU_PTE_ATTR_DEVICE             MMU_PTE_ATTR_ATTR_INDEX(1)
+
+/* Normal Memory, Outer Write-back non-transient Read/Write allocate,
+ * Inner Write-back non-transient Read/Write allocate
+ */
+#define MMU_MAIR_ATTR2                  MMU_MAIR_ATTR(2, 0xff)
+#define MMU_PTE_ATTR_NORMAL_MEMORY      MMU_PTE_ATTR_ATTR_INDEX(2)
+
+#define MMU_MAIR_ATTR3                  (0)
+#define MMU_MAIR_ATTR4                  (0)
+#define MMU_MAIR_ATTR5                  (0)
+#define MMU_MAIR_ATTR6                  (0)
+#define MMU_MAIR_ATTR7                  (0)
+
+#define MMU_MAIR_VAL                    (MMU_MAIR_ATTR0 | MMU_MAIR_ATTR1 | \
+                                         MMU_MAIR_ATTR2 | MMU_MAIR_ATTR3 | \
+                                         MMU_MAIR_ATTR4 | MMU_MAIR_ATTR5 | \
+                                         MMU_MAIR_ATTR6 | MMU_MAIR_ATTR7 )
+
+#define MMU_TCR_IPS_DEFAULT MMU_TCR_IPS(2) /* TODO: read at runtime, or configure per platform */
+
+/* Enable cached page table walks:
+ * inner/outer (IRGN/ORGN): write-back + write-allocate
+ */
+#define MMU_TCR_FLAGS1 (MMU_TCR_TG1(MMU_TG1(MMU_KERNEL_PAGE_SIZE_SHIFT)) | \
+                        MMU_TCR_SH1(MMU_SH_INNER_SHAREABLE) | \
+                        MMU_TCR_ORGN1(MMU_RGN_WRITE_BACK_ALLOCATE) | \
+                        MMU_TCR_IRGN1(MMU_RGN_WRITE_BACK_ALLOCATE) | \
+                        MMU_TCR_T1SZ(64 - MMU_KERNEL_SIZE_SHIFT))
+#define MMU_TCR_FLAGS0 (MMU_TCR_TG0(MMU_TG0(MMU_USER_PAGE_SIZE_SHIFT)) | \
+                        MMU_TCR_SH0(MMU_SH_INNER_SHAREABLE) | \
+                        MMU_TCR_ORGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \
+                        MMU_TCR_IRGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \
+                        MMU_TCR_T0SZ(64 - MMU_USER_SIZE_SHIFT))
+#define MMU_TCR_FLAGS0_IDENT \
+                       (MMU_TCR_TG0(MMU_TG0(MMU_IDENT_PAGE_SIZE_SHIFT)) | \
+                        MMU_TCR_SH0(MMU_SH_INNER_SHAREABLE) | \
+                        MMU_TCR_ORGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \
+                        MMU_TCR_IRGN0(MMU_RGN_WRITE_BACK_ALLOCATE) | \
+                        MMU_TCR_T0SZ(64 - MMU_IDENT_SIZE_SHIFT))
+#define MMU_TCR_FLAGS_IDENT (MMU_TCR_IPS_DEFAULT | MMU_TCR_FLAGS1 | MMU_TCR_FLAGS0_IDENT)
+#define MMU_TCR_FLAGS_KERNEL (MMU_TCR_IPS_DEFAULT | MMU_TCR_FLAGS1 | MMU_TCR_FLAGS0 | MMU_TCR_EPD0)
+#define MMU_TCR_FLAGS_USER (MMU_TCR_IPS_DEFAULT | MMU_TCR_FLAGS1 | MMU_TCR_FLAGS0)
+
+
+#if MMU_IDENT_SIZE_SHIFT > MMU_LX_X(MMU_IDENT_PAGE_SIZE_SHIFT, 2)
+#define MMU_PTE_IDENT_DESCRIPTOR MMU_PTE_L012_DESCRIPTOR_BLOCK
+#else
+#define MMU_PTE_IDENT_DESCRIPTOR MMU_PTE_L3_DESCRIPTOR_PAGE
+#endif
+#define MMU_PTE_IDENT_FLAGS \
+    (MMU_PTE_IDENT_DESCRIPTOR | \
+     MMU_PTE_ATTR_AF | \
+     MMU_PTE_ATTR_SH_INNER_SHAREABLE | \
+     MMU_PTE_ATTR_NORMAL_MEMORY | \
+     MMU_PTE_ATTR_AP_P_RW_U_NA)
+
+#define MMU_PTE_KERNEL_RO_FLAGS \
+    (MMU_PTE_ATTR_UXN | \
+     MMU_PTE_ATTR_AF | \
+     MMU_PTE_ATTR_SH_INNER_SHAREABLE | \
+     MMU_PTE_ATTR_NORMAL_MEMORY | \
+     MMU_PTE_ATTR_AP_P_RO_U_NA)
+
+#define MMU_PTE_KERNEL_DATA_FLAGS \
+    (MMU_PTE_ATTR_UXN | \
+     MMU_PTE_ATTR_PXN | \
+     MMU_PTE_ATTR_AF | \
+     MMU_PTE_ATTR_SH_INNER_SHAREABLE | \
+     MMU_PTE_ATTR_NORMAL_MEMORY | \
+     MMU_PTE_ATTR_AP_P_RW_U_NA)
+
+#define MMU_INITIAL_MAP_STRONGLY_ORDERED \
+    (MMU_PTE_ATTR_UXN | \
+     MMU_PTE_ATTR_PXN | \
+     MMU_PTE_ATTR_AF | \
+     MMU_PTE_ATTR_STRONGLY_ORDERED | \
+     MMU_PTE_ATTR_AP_P_RW_U_NA)
+
+#define MMU_INITIAL_MAP_DEVICE \
+    (MMU_PTE_ATTR_UXN | \
+     MMU_PTE_ATTR_PXN | \
+     MMU_PTE_ATTR_AF | \
+     MMU_PTE_ATTR_DEVICE | \
+     MMU_PTE_ATTR_AP_P_RW_U_NA)
+
+#ifndef ASSEMBLY
+
+#include <sys/types.h>
+#include <assert.h>
+#include <compiler.h>
+#include <arch/arm64.h>
+
+typedef uint64_t pte_t;
+
+__BEGIN_CDECLS
+
+#define ARM64_TLBI_NOADDR(op) \
+({ \
+    __asm__ volatile("tlbi " #op::); \
+    ISB; \
+})
+
+#define ARM64_TLBI(op, val) \
+({ \
+    __asm__ volatile("tlbi " #op ", %0" :: "r" (val)); \
+    ISB; \
+})
+
+#define MMU_ARM64_GLOBAL_ASID (~0U)
+int arm64_mmu_map(vaddr_t vaddr, paddr_t paddr, size_t size, pte_t attrs,
+                  vaddr_t vaddr_base, uint top_size_shift,
+                  uint top_index_shift, uint page_size_shift,
+                  pte_t *top_page_table, uint asid);
+int arm64_mmu_unmap(vaddr_t vaddr, size_t size,
+                    vaddr_t vaddr_base, uint top_size_shift,
+                    uint top_index_shift, uint page_size_shift,
+                    pte_t *top_page_table, uint asid);
+
+__END_CDECLS
+#endif /* ASSEMBLY */
+
+#endif
diff --git a/src/bsp/lk/arch/arm64/include/arch/asm_macros.h b/src/bsp/lk/arch/arm64/include/arch/asm_macros.h
new file mode 100644
index 0000000..05543f0
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/asm_macros.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+
+.macro push ra, rb
+stp \ra, \rb, [sp,#-16]!
+.endm
+
+.macro pop ra, rb
+ldp \ra, \rb, [sp], #16
+.endm
+
+.macro tbzmask, reg, mask, label, shift=0
+.if \shift >= 64
+    .error "tbzmask: unsupported mask, \mask"
+.elseif \mask == 1 << \shift
+    tbz     \reg, #\shift, \label
+.else
+    tbzmask \reg, \mask, \label, "(\shift + 1)"
+.endif
+.endm
+
+.macro tbnzmask, reg, mask, label, shift=0
+.if \shift >= 64
+    .error "tbnzmask: unsupported mask, \mask"
+.elseif \mask == 1 << \shift
+    tbnz     \reg, #\shift, \label
+.else
+    tbnzmask \reg, \mask, \label, "(\shift + 1)"
+.endif
+.endm
+
+.macro calloc_bootmem_aligned, new_ptr, new_ptr_end, tmp, size_shift, phys_offset=0
+.if \size_shift < 4
+    .error "calloc_bootmem_aligned: Unsupported size_shift, \size_shift"
+.endif
+
+    /* load boot_alloc_end */
+    adrp    \tmp, boot_alloc_end
+    ldr     \new_ptr, [\tmp, #:lo12:boot_alloc_end]
+
+    /* align to page */
+.if \size_shift > 12
+    add     \new_ptr, \new_ptr, #(1 << \size_shift)
+    sub     \new_ptr, \new_ptr, #1
+.else
+    add     \new_ptr, \new_ptr, #(1 << \size_shift) - 1
+.endif
+    and     \new_ptr, \new_ptr, #~((1 << \size_shift) - 1)
+
+    /* add one page and store boot_alloc_end */
+    add     \new_ptr_end, \new_ptr, #(1 << \size_shift)
+    str     \new_ptr_end, [\tmp, #:lo12:boot_alloc_end]
+
+    /* translate address */
+    sub     \new_ptr, \new_ptr, \phys_offset
+    sub     \new_ptr_end, \new_ptr_end, \phys_offset
+
+    /* clear page */
+    mov     \tmp, \new_ptr
+.Lcalloc_bootmem_aligned_clear_loop\@:
+    stp     xzr, xzr, [\tmp], #16
+    cmp     \tmp, \new_ptr_end
+    b.lo    .Lcalloc_bootmem_aligned_clear_loop\@
+.endm
+
+/* Set fault handler for next instruction */
+.macro set_fault_handler, handler
+.Lfault_location\@:
+.pushsection .rodata.fault_handler_table
+    .quad    .Lfault_location\@
+    .quad    \handler
+.popsection
+.endm
diff --git a/src/bsp/lk/arch/arm64/include/arch/defines.h b/src/bsp/lk/arch/arm64/include/arch/defines.h
new file mode 100755
index 0000000..2907825
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/defines.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2008 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+
+#define SHIFT_4K        (12)
+#define SHIFT_16K       (14)
+#define SHIFT_64K       (16)
+
+/* arm specific stuff */
+#ifdef ARM64_LARGE_PAGESIZE_64K
+#define PAGE_SIZE_SHIFT (SHIFT_64K)
+#elif ARM64_LARGE_PAGESIZE_16K
+#define PAGE_SIZE_SHIFT (SHIFT_16K)
+#else
+#define PAGE_SIZE_SHIFT (SHIFT_4K)
+#endif
+#define USER_PAGE_SIZE_SHIFT SHIFT_4K
+
+#define PAGE_SIZE (1UL << PAGE_SIZE_SHIFT)
+#define USER_PAGE_SIZE (1UL << USER_PAGE_SIZE_SHIFT)
+
+#define CACHE_LINE 64
+
diff --git a/src/bsp/lk/arch/arm64/include/arch/spinlock.h b/src/bsp/lk/arch/arm64/include/arch/spinlock.h
new file mode 100644
index 0000000..f063cee
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/include/arch/spinlock.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+
+#include <arch/ops.h>
+#include <stdbool.h>
+
+#define SPIN_LOCK_INITIAL_VALUE (0)
+
+typedef unsigned long spin_lock_t;
+
+typedef unsigned int spin_lock_saved_state_t;
+typedef unsigned int spin_lock_save_flags_t;
+
+#if WITH_SMP
+void arch_spin_lock(spin_lock_t *lock);
+int arch_spin_trylock(spin_lock_t *lock);
+void arch_spin_unlock(spin_lock_t *lock);
+#else
+static inline void arch_spin_lock(spin_lock_t *lock)
+{
+    *lock = 1;
+}
+
+static inline int arch_spin_trylock(spin_lock_t *lock)
+{
+    return 0;
+}
+
+static inline void arch_spin_unlock(spin_lock_t *lock)
+{
+    *lock = 0;
+}
+#endif
+
+static inline void arch_spin_lock_init(spin_lock_t *lock)
+{
+    *lock = SPIN_LOCK_INITIAL_VALUE;
+}
+
+static inline bool arch_spin_lock_held(spin_lock_t *lock)
+{
+    return *lock != 0;
+}
+
+enum {
+    /* Possible future flags:
+     * SPIN_LOCK_FLAG_PMR_MASK         = 0x000000ff,
+     * SPIN_LOCK_FLAG_PREEMPTION       = 0x10000000,
+     * SPIN_LOCK_FLAG_SET_PMR          = 0x20000000,
+     */
+
+    /* ARM specific flags */
+    SPIN_LOCK_FLAG_IRQ              = 0x40000000,
+    SPIN_LOCK_FLAG_FIQ              = 0x80000000, /* Do not use unless IRQs are already disabled */
+    SPIN_LOCK_FLAG_IRQ_FIQ          = SPIN_LOCK_FLAG_IRQ | SPIN_LOCK_FLAG_FIQ,
+
+    /* Generic flags */
+    SPIN_LOCK_FLAG_INTERRUPTS       = SPIN_LOCK_FLAG_IRQ,
+};
+
+    /* default arm flag is to just disable plain irqs */
+#define ARCH_DEFAULT_SPIN_LOCK_FLAG_INTERRUPTS  SPIN_LOCK_FLAG_INTERRUPTS
+
+enum {
+    /* private */
+    SPIN_LOCK_STATE_RESTORE_IRQ = 1,
+    SPIN_LOCK_STATE_RESTORE_FIQ = 2,
+};
+
+static inline void
+arch_interrupt_save(spin_lock_saved_state_t *statep, spin_lock_save_flags_t flags)
+{
+    spin_lock_saved_state_t state = 0;
+    if ((flags & SPIN_LOCK_FLAG_IRQ) && !arch_ints_disabled()) {
+        state |= SPIN_LOCK_STATE_RESTORE_IRQ;
+        arch_disable_ints();
+    }
+    if ((flags & SPIN_LOCK_FLAG_FIQ) && !arch_fiqs_disabled()) {
+        state |= SPIN_LOCK_STATE_RESTORE_FIQ;
+        arch_disable_fiqs();
+    }
+    *statep = state;
+}
+
+static inline void
+arch_interrupt_restore(spin_lock_saved_state_t old_state, spin_lock_save_flags_t flags)
+{
+    if ((flags & SPIN_LOCK_FLAG_FIQ) && (old_state & SPIN_LOCK_STATE_RESTORE_FIQ))
+        arch_enable_fiqs();
+    if ((flags & SPIN_LOCK_FLAG_IRQ) && (old_state & SPIN_LOCK_STATE_RESTORE_IRQ))
+        arch_enable_ints();
+}
+
+
+
diff --git a/src/bsp/lk/arch/arm64/mmu.c b/src/bsp/lk/arch/arm64/mmu.c
new file mode 100644
index 0000000..c723bf8
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/mmu.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2014 Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <arch/arm64/mmu.h>
+#include <assert.h>
+#include <debug.h>
+#include <err.h>
+#include <kernel/vm.h>
+#include <lib/heap.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <trace.h>
+
+#define LOCAL_TRACE 0
+
+STATIC_ASSERT(((long)KERNEL_BASE >> MMU_KERNEL_SIZE_SHIFT) == -1);
+STATIC_ASSERT(((long)KERNEL_ASPACE_BASE >> MMU_KERNEL_SIZE_SHIFT) == -1);
+STATIC_ASSERT(MMU_KERNEL_SIZE_SHIFT <= 48);
+STATIC_ASSERT(MMU_KERNEL_SIZE_SHIFT >= 25);
+
+/* the main translation table */
+pte_t arm64_kernel_translation_table[MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP] __ALIGNED(MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP * 8) __SECTION(".bss.prebss.translation_table");
+
+/* convert user level mmu flags to flags that go in L1 descriptors */
+static pte_t mmu_flags_to_pte_attr(uint flags)
+{
+    pte_t attr = MMU_PTE_ATTR_AF;
+
+    switch (flags & ARCH_MMU_FLAG_CACHE_MASK) {
+        case ARCH_MMU_FLAG_CACHED:
+            attr |= MMU_PTE_ATTR_NORMAL_MEMORY | MMU_PTE_ATTR_SH_INNER_SHAREABLE;
+            break;
+        case ARCH_MMU_FLAG_UNCACHED:
+            attr |= MMU_PTE_ATTR_STRONGLY_ORDERED;
+            break;
+        case ARCH_MMU_FLAG_UNCACHED_DEVICE:
+            attr |= MMU_PTE_ATTR_DEVICE;
+            break;
+        default:
+            /* invalid user-supplied flag */
+            DEBUG_ASSERT(1);
+            return ERR_INVALID_ARGS;
+    }
+
+    switch (flags & (ARCH_MMU_FLAG_PERM_USER | ARCH_MMU_FLAG_PERM_RO)) {
+        case 0:
+            attr |= MMU_PTE_ATTR_AP_P_RW_U_NA;
+            break;
+        case ARCH_MMU_FLAG_PERM_RO:
+            attr |= MMU_PTE_ATTR_AP_P_RO_U_NA;
+            break;
+        case ARCH_MMU_FLAG_PERM_USER:
+            attr |= MMU_PTE_ATTR_AP_P_RW_U_RW;
+            break;
+        case ARCH_MMU_FLAG_PERM_USER | ARCH_MMU_FLAG_PERM_RO:
+            attr |= MMU_PTE_ATTR_AP_P_RO_U_RO;
+            break;
+    }
+
+    if (flags & ARCH_MMU_FLAG_PERM_NO_EXECUTE) {
+        attr |= MMU_PTE_ATTR_UXN | MMU_PTE_ATTR_PXN;
+    }
+
+    if (flags & ARCH_MMU_FLAG_NS) {
+            attr |= MMU_PTE_ATTR_NON_SECURE;
+    }
+
+    return attr;
+}
+
+status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags)
+{
+    uint index;
+    uint index_shift;
+    pte_t pte;
+    pte_t pte_addr;
+    uint descriptor_type;
+    pte_t *page_table;
+    vaddr_t kernel_base = ~0UL << MMU_KERNEL_SIZE_SHIFT;
+    vaddr_t vaddr_rem;
+
+    if (vaddr < kernel_base) {
+        TRACEF("vaddr 0x%lx < base 0x%lx\n", vaddr, kernel_base);
+        return ERR_INVALID_ARGS;
+    }
+
+    index_shift = MMU_KERNEL_TOP_SHIFT;
+    page_table = arm64_kernel_translation_table;
+
+    vaddr_rem = vaddr - kernel_base;
+    index = vaddr_rem >> index_shift;
+    ASSERT(index < MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP);
+
+    while (true) {
+        index = vaddr_rem >> index_shift;
+        vaddr_rem -= (vaddr_t)index << index_shift;
+        pte = page_table[index];
+        descriptor_type = pte & MMU_PTE_DESCRIPTOR_MASK;
+        pte_addr = pte & MMU_PTE_OUTPUT_ADDR_MASK;
+
+        LTRACEF("va 0x%lx, index %d, index_shift %d, rem 0x%lx, pte 0x%llx\n",
+                vaddr, index, index_shift, vaddr_rem, pte);
+
+        if (descriptor_type == MMU_PTE_DESCRIPTOR_INVALID)
+            return ERR_NOT_FOUND;
+
+        if (descriptor_type == ((index_shift > MMU_KERNEL_PAGE_SIZE_SHIFT) ?
+                                 MMU_PTE_L012_DESCRIPTOR_BLOCK :
+                                 MMU_PTE_L3_DESCRIPTOR_PAGE)) {
+            break;
+        }
+
+        if (index_shift <= MMU_KERNEL_PAGE_SIZE_SHIFT ||
+            descriptor_type != MMU_PTE_L012_DESCRIPTOR_TABLE) {
+            PANIC_UNIMPLEMENTED;
+        }
+
+        page_table = paddr_to_kvaddr(pte_addr);
+        index_shift -= MMU_KERNEL_PAGE_SIZE_SHIFT - 3;
+    }
+
+    if (paddr)
+        *paddr = pte_addr + vaddr_rem;
+    if (flags) {
+        *flags = 0;
+        if (pte & MMU_PTE_ATTR_NON_SECURE)
+            *flags |= ARCH_MMU_FLAG_NS;
+        switch (pte & MMU_PTE_ATTR_ATTR_INDEX_MASK) {
+            case MMU_PTE_ATTR_STRONGLY_ORDERED:
+                *flags |= ARCH_MMU_FLAG_UNCACHED;
+                break;
+            case MMU_PTE_ATTR_DEVICE:
+                *flags |= ARCH_MMU_FLAG_UNCACHED_DEVICE;
+                break;
+            case MMU_PTE_ATTR_NORMAL_MEMORY:
+                break;
+            default:
+                PANIC_UNIMPLEMENTED;
+        }
+        switch (pte & MMU_PTE_ATTR_AP_MASK) {
+            case MMU_PTE_ATTR_AP_P_RW_U_NA:
+                break;
+            case MMU_PTE_ATTR_AP_P_RW_U_RW:
+                *flags |= ARCH_MMU_FLAG_PERM_USER;
+                break;
+            case MMU_PTE_ATTR_AP_P_RO_U_NA:
+                *flags |= ARCH_MMU_FLAG_PERM_RO;
+                break;
+            case MMU_PTE_ATTR_AP_P_RO_U_RO:
+                *flags |= ARCH_MMU_FLAG_PERM_USER | ARCH_MMU_FLAG_PERM_RO;
+                break;
+        }
+        if ((pte & MMU_PTE_ATTR_UXN) && (pte & MMU_PTE_ATTR_PXN)) {
+            *flags |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
+        }
+    }
+    LTRACEF("va 0x%lx, paddr 0x%lx, flags 0x%x\n",
+            vaddr, paddr ? *paddr : ~0UL, flags ? *flags : ~0U);
+    return 0;
+}
+
+static int alloc_page_table(paddr_t *paddrp, uint page_size_shift)
+{
+    size_t ret;
+    size_t count;
+    size_t size = 1U << page_size_shift;
+    void *vaddr;
+
+    if (size >= PAGE_SIZE) {
+        count = size / PAGE_SIZE;
+        ret = pmm_alloc_contiguous(count, page_size_shift, paddrp, NULL);
+        if (ret != count)
+            return ERR_NO_MEMORY;
+    } else {
+        vaddr = memalign(size, size);
+        if (!vaddr)
+            return ERR_NO_MEMORY;
+        ret = arch_mmu_query((vaddr_t)vaddr, paddrp, NULL);
+        if (ret) {
+            free(vaddr);
+            return ret;
+        }
+    }
+    return 0;
+}
+
+static void free_page_table(void *vaddr, paddr_t paddr, uint page_size_shift)
+{
+    vm_page_t *address_to_page(paddr_t addr); /* TODO: remove */
+
+    size_t size = 1U << page_size_shift;
+    vm_page_t *page;
+
+    if (size >= PAGE_SIZE) {
+        page = address_to_page(paddr);
+        if (!page)
+            panic("bad page table paddr 0x%lx\n", paddr);
+        pmm_free_page(page);
+    } else {
+        free(vaddr);
+    }
+}
+
+static pte_t *arm64_mmu_get_page_table(vaddr_t index, uint page_size_shift, pte_t *page_table)
+{
+    pte_t pte;
+    paddr_t paddr;
+    void *vaddr;
+    int ret;
+
+    pte = page_table[index];
+    switch (pte & MMU_PTE_DESCRIPTOR_MASK) {
+    case MMU_PTE_DESCRIPTOR_INVALID:
+        ret = alloc_page_table(&paddr, page_size_shift);
+        if (ret) {
+            TRACEF("failed to allocate page table\n");
+            return NULL;
+        }
+        vaddr = paddr_to_kvaddr(paddr);
+        LTRACEF("allocated page table, vaddr %p, paddr 0x%lx\n", vaddr, paddr);
+        memset(vaddr, MMU_PTE_DESCRIPTOR_INVALID, 1U << page_size_shift);
+        __asm__ volatile("dmb ishst" ::: "memory");
+        pte = paddr | MMU_PTE_L012_DESCRIPTOR_TABLE;
+        page_table[index] = pte;
+        LTRACEF("pte %p[0x%lx] = 0x%llx\n", page_table, index, pte);
+        return vaddr;
+
+    case MMU_PTE_L012_DESCRIPTOR_TABLE:
+        paddr = pte & MMU_PTE_OUTPUT_ADDR_MASK;
+        LTRACEF("found page table 0x%lx\n", paddr);
+        return paddr_to_kvaddr(paddr);
+
+    case MMU_PTE_L012_DESCRIPTOR_BLOCK:
+        return NULL;
+
+    default:
+        PANIC_UNIMPLEMENTED;
+    }
+}
+
+static bool page_table_is_clear(pte_t *page_table, uint page_size_shift)
+{
+    int i;
+    int count = 1U << (page_size_shift - 3);
+    pte_t pte;
+
+    for (i = 0; i < count; i++) {
+        pte = page_table[i];
+        if (pte != MMU_PTE_DESCRIPTOR_INVALID) {
+            LTRACEF("page_table at %p still in use, index %d is 0x%llx\n",
+                    page_table, i, pte);
+            return false;
+        }
+    }
+
+    LTRACEF("page table at %p is clear\n", page_table);
+    return true;
+}
+
+static void arm64_mmu_unmap_pt(vaddr_t vaddr, vaddr_t vaddr_rel,
+                               size_t size,
+                               uint index_shift, uint page_size_shift,
+                               pte_t *page_table, uint asid)
+{
+    pte_t *next_page_table;
+    vaddr_t index;
+    size_t chunk_size;
+    vaddr_t vaddr_rem;
+    vaddr_t block_size;
+    vaddr_t block_mask;
+    pte_t pte;
+    paddr_t page_table_paddr;
+
+    LTRACEF("vaddr 0x%lx, vaddr_rel 0x%lx, size 0x%lx, index shift %d, page_size_shift %d, page_table %p\n",
+            vaddr, vaddr_rel, size, index_shift, page_size_shift, page_table);
+
+    while (size) {
+        block_size = 1UL << index_shift;
+        block_mask = block_size - 1;
+        vaddr_rem = vaddr_rel & block_mask;
+        chunk_size = MIN(size, block_size - vaddr_rem);
+        index = vaddr_rel >> index_shift;
+
+        pte = page_table[index];
+
+        if (index_shift > page_size_shift &&
+            (pte & MMU_PTE_DESCRIPTOR_MASK) == MMU_PTE_L012_DESCRIPTOR_TABLE) {
+            page_table_paddr = pte & MMU_PTE_OUTPUT_ADDR_MASK;
+            next_page_table = paddr_to_kvaddr(page_table_paddr);
+            arm64_mmu_unmap_pt(vaddr, vaddr_rem, chunk_size,
+                               index_shift - (page_size_shift - 3),
+                               page_size_shift,
+                               next_page_table, asid);
+            if (chunk_size == block_size ||
+                page_table_is_clear(next_page_table, page_size_shift)) {
+                LTRACEF("pte %p[0x%lx] = 0 (was page table)\n", page_table, index);
+                page_table[index] = MMU_PTE_DESCRIPTOR_INVALID;
+                __asm__ volatile("dmb ishst" ::: "memory");
+                free_page_table(next_page_table, page_table_paddr, page_size_shift);
+            }
+        } else if (pte) {
+            LTRACEF("pte %p[0x%lx] = 0\n", page_table, index);
+            page_table[index] = MMU_PTE_DESCRIPTOR_INVALID;
+            CF;
+            if (asid == MMU_ARM64_GLOBAL_ASID)
+                ARM64_TLBI(vaae1is, vaddr >> 12);
+            else
+                ARM64_TLBI(vae1is, vaddr >> 12 | (vaddr_t)asid << 48);
+        } else {
+            LTRACEF("pte %p[0x%lx] already clear\n", page_table, index);
+        }
+        vaddr += chunk_size;
+        vaddr_rel += chunk_size;
+        size -= chunk_size;
+    }
+}
+
+static int arm64_mmu_map_pt(vaddr_t vaddr_in, vaddr_t vaddr_rel_in,
+                            paddr_t paddr_in,
+                            size_t size_in, pte_t attrs,
+                            uint index_shift, uint page_size_shift,
+                            pte_t *page_table, uint asid)
+{
+    int ret;
+    pte_t *next_page_table;
+    vaddr_t index;
+    vaddr_t vaddr = vaddr_in;
+    vaddr_t vaddr_rel = vaddr_rel_in;
+    paddr_t paddr = paddr_in;
+    size_t size = size_in;
+    size_t chunk_size;
+    vaddr_t vaddr_rem;
+    vaddr_t block_size;
+    vaddr_t block_mask;
+    pte_t pte;
+
+    LTRACEF("vaddr 0x%lx, vaddr_rel 0x%lx, paddr 0x%lx, size 0x%lx, attrs 0x%llx, index shift %d, page_size_shift %d, page_table %p\n",
+            vaddr, vaddr_rel, paddr, size, attrs,
+            index_shift, page_size_shift, page_table);
+
+    if ((vaddr_rel | paddr | size) & ((1UL << page_size_shift) - 1)) {
+        TRACEF("not page aligned\n");
+        return ERR_INVALID_ARGS;
+    }
+
+    while (size) {
+        block_size = 1UL << index_shift;
+        block_mask = block_size - 1;
+        vaddr_rem = vaddr_rel & block_mask;
+        chunk_size = MIN(size, block_size - vaddr_rem);
+        index = vaddr_rel >> index_shift;
+
+        if (((vaddr_rel | paddr) & block_mask) ||
+            (chunk_size != block_size) ||
+            (index_shift > MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT)) {
+            next_page_table = arm64_mmu_get_page_table(index, page_size_shift,
+                                                       page_table);
+            if (!next_page_table)
+                goto err;
+
+            ret = arm64_mmu_map_pt(vaddr, vaddr_rem, paddr, chunk_size, attrs,
+                                   index_shift - (page_size_shift - 3),
+                                   page_size_shift, next_page_table, asid);
+            if (ret)
+                goto err;
+        } else {
+            pte = page_table[index];
+            if (pte) {
+                TRACEF("page table entry already in use, index 0x%lx, 0x%llx\n",
+                       index, pte);
+                goto err;
+            }
+
+            pte = paddr | attrs;
+            if (index_shift > page_size_shift)
+                pte |= MMU_PTE_L012_DESCRIPTOR_BLOCK;
+            else
+                pte |= MMU_PTE_L3_DESCRIPTOR_PAGE;
+
+            LTRACEF("pte %p[0x%lx] = 0x%llx\n", page_table, index, pte);
+            page_table[index] = pte;
+        }
+        vaddr += chunk_size;
+        vaddr_rel += chunk_size;
+        paddr += chunk_size;
+        size -= chunk_size;
+    }
+
+    return 0;
+
+err:
+    arm64_mmu_unmap_pt(vaddr_in, vaddr_rel_in, size_in - size,
+                       index_shift, page_size_shift, page_table, asid);
+    DSB;
+    return ERR_GENERIC;
+}
+
+int arm64_mmu_map(vaddr_t vaddr, paddr_t paddr, size_t size, pte_t attrs,
+                  vaddr_t vaddr_base, uint top_size_shift,
+                  uint top_index_shift, uint page_size_shift,
+                  pte_t *top_page_table, uint asid)
+{
+    int ret;
+    vaddr_t vaddr_rel = vaddr - vaddr_base;
+    vaddr_t vaddr_rel_max = 1UL << top_size_shift;
+
+    LTRACEF("vaddr 0x%lx, paddr 0x%lx, size 0x%lx, attrs 0x%llx, asid 0x%x\n",
+            vaddr, paddr, size, attrs, asid);
+
+    if (vaddr_rel > vaddr_rel_max - size || size > vaddr_rel_max) {
+        TRACEF("vaddr 0x%lx, size 0x%lx out of range vaddr 0x%lx, size 0x%lx\n",
+               vaddr, size, vaddr_base, vaddr_rel_max);
+        return ERR_INVALID_ARGS;
+    }
+
+    if (!top_page_table) {
+        TRACEF("page table is NULL\n");
+        return ERR_INVALID_ARGS;
+    }
+
+    ret = arm64_mmu_map_pt(vaddr, vaddr_rel, paddr, size, attrs,
+                           top_index_shift, page_size_shift, top_page_table, asid);
+    DSB;
+    return ret;
+}
+
+int arm64_mmu_unmap(vaddr_t vaddr, size_t size,
+                    vaddr_t vaddr_base, uint top_size_shift,
+                    uint top_index_shift, uint page_size_shift,
+                    pte_t *top_page_table, uint asid)
+{
+    vaddr_t vaddr_rel = vaddr - vaddr_base;
+    vaddr_t vaddr_rel_max = 1UL << top_size_shift;
+
+    LTRACEF("vaddr 0x%lx, size 0x%lx, asid 0x%x\n", vaddr, size, asid);
+
+    if (vaddr_rel > vaddr_rel_max - size || size > vaddr_rel_max) {
+        TRACEF("vaddr 0x%lx, size 0x%lx out of range vaddr 0x%lx, size 0x%lx\n",
+               vaddr, size, vaddr_base, vaddr_rel_max);
+        return ERR_INVALID_ARGS;
+    }
+
+    if (!top_page_table) {
+        TRACEF("page table is NULL\n");
+        return ERR_INVALID_ARGS;
+    }
+
+    arm64_mmu_unmap_pt(vaddr, vaddr_rel, size,
+                       top_index_shift, page_size_shift, top_page_table, asid);
+    DSB;
+    return 0;
+}
+
+int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags)
+{
+    return arm64_mmu_map(vaddr, paddr, count * PAGE_SIZE,
+                         mmu_flags_to_pte_attr(flags),
+                         ~0UL << MMU_KERNEL_SIZE_SHIFT, MMU_KERNEL_SIZE_SHIFT,
+                         MMU_KERNEL_TOP_SHIFT, MMU_KERNEL_PAGE_SIZE_SHIFT,
+                         arm64_kernel_translation_table, MMU_ARM64_GLOBAL_ASID);
+}
+
+int arch_mmu_unmap(vaddr_t vaddr, uint count)
+{
+    return arm64_mmu_unmap(vaddr, count * PAGE_SIZE,
+                           ~0UL << MMU_KERNEL_SIZE_SHIFT, MMU_KERNEL_SIZE_SHIFT,
+                           MMU_KERNEL_TOP_SHIFT, MMU_KERNEL_PAGE_SIZE_SHIFT,
+                           arm64_kernel_translation_table,
+                           MMU_ARM64_GLOBAL_ASID);
+}
diff --git a/src/bsp/lk/arch/arm64/mp.c b/src/bsp/lk/arch/arm64/mp.c
new file mode 100644
index 0000000..c3cfcea
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/mp.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2014 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <arch/mp.h>
+
+#include <assert.h>
+#include <compiler.h>
+#include <trace.h>
+#include <err.h>
+#include <platform/interrupts.h>
+#include <arch/ops.h>
+
+#if WITH_DEV_INTERRUPT_ARM_GIC
+#include <dev/interrupt/arm_gic.h>
+//#else
+//#error need other implementation of interrupt controller that can ipi
+#endif
+
+#define LOCAL_TRACE 0
+
+#define GIC_IPI_BASE (14)
+
+__WEAK status_t arch_mp_send_ipi(mp_cpu_mask_t target, mp_ipi_t ipi)
+{
+    LTRACEF("target 0x%x, ipi %u\n", target, ipi);
+
+#if WITH_DEV_INTERRUPT_ARM_GIC
+    uint gic_ipi_num = ipi + GIC_IPI_BASE;
+
+    /* filter out targets outside of the range of cpus we care about */
+    target &= ((1UL << SMP_MAX_CPUS) - 1);
+    if (target != 0) {
+        LTRACEF("target 0x%x, gic_ipi %u\n", target, gic_ipi_num);
+        arm_gic_sgi(gic_ipi_num, ARM_GIC_SGI_FLAG_NS, target);
+    }
+#endif
+
+    return NO_ERROR;
+}
+
+enum handler_return arm_ipi_generic_handler(void *arg)
+{
+    LTRACEF("cpu %u, arg %p\n", arch_curr_cpu_num(), arg);
+
+    return INT_NO_RESCHEDULE;
+}
+
+enum handler_return arm_ipi_reschedule_handler(void *arg)
+{
+    LTRACEF("cpu %u, arg %p\n", arch_curr_cpu_num(), arg);
+
+    return mp_mbx_reschedule_irq();
+}
+
+__WEAK void arch_mp_init_percpu(void)
+{
+    register_int_handler(MP_IPI_GENERIC + GIC_IPI_BASE, &arm_ipi_generic_handler, 0);
+    register_int_handler(MP_IPI_RESCHEDULE + GIC_IPI_BASE, &arm_ipi_reschedule_handler, 0);
+
+    //unmask_interrupt(MP_IPI_GENERIC);
+    //unmask_interrupt(MP_IPI_RESCHEDULE);
+}
+
diff --git a/src/bsp/lk/arch/arm64/rules.mk b/src/bsp/lk/arch/arm64/rules.mk
new file mode 100644
index 0000000..ab6ac75
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/rules.mk
@@ -0,0 +1,125 @@
+LOCAL_DIR := $(GET_LOCAL_DIR)
+
+MODULE := $(LOCAL_DIR)
+
+GLOBAL_DEFINES += \
+	ARM64_CPU_$(ARM_CPU)=1 \
+	ARM_ISA_ARMV8=1 \
+	IS_64BIT=1
+
+MODULE_SRCS += \
+	$(LOCAL_DIR)/arch.c \
+	$(LOCAL_DIR)/asm.S \
+	$(LOCAL_DIR)/exceptions.S \
+	$(LOCAL_DIR)/exceptions_el2_el3.S \
+	$(LOCAL_DIR)/exceptions_c.c \
+	$(LOCAL_DIR)/fpu.c \
+	$(LOCAL_DIR)/thread.c \
+	$(LOCAL_DIR)/spinlock.S \
+	$(LOCAL_DIR)/start.S \
+	$(LOCAL_DIR)/cache-ops.S \
+
+#	$(LOCAL_DIR)/arm/start.S \
+	$(LOCAL_DIR)/arm/cache.c \
+	$(LOCAL_DIR)/arm/ops.S \
+	$(LOCAL_DIR)/arm/faults.c \
+	$(LOCAL_DIR)/arm/dcc.S
+
+GLOBAL_DEFINES += \
+	ARCH_DEFAULT_STACK_SIZE=8192
+
+# if its requested we build with SMP, arm generically supports 4 cpus
+ifeq ($(WITH_SMP),1)
+SMP_MAX_CPUS ?= 4
+SMP_CPU_CLUSTER_SHIFT ?= 8
+SMP_CPU_ID_BITS ?= 24 # Ignore aff3 bits for now since they are not next to aff2
+
+GLOBAL_DEFINES += \
+    WITH_SMP=1 \
+    SMP_MAX_CPUS=$(SMP_MAX_CPUS) \
+    SMP_CPU_CLUSTER_SHIFT=$(SMP_CPU_CLUSTER_SHIFT) \
+    SMP_CPU_ID_BITS=$(SMP_CPU_ID_BITS)
+
+MODULE_SRCS += \
+    $(LOCAL_DIR)/mp.c
+else
+GLOBAL_DEFINES += \
+    SMP_MAX_CPUS=1
+endif
+
+ARCH_OPTFLAGS := -O2
+
+# we have a mmu and want the vmm/pmm
+WITH_KERNEL_VM ?= 1
+
+ifeq ($(WITH_KERNEL_VM),1)
+
+MODULE_SRCS += \
+	$(LOCAL_DIR)/mmu.c
+
+KERNEL_ASPACE_BASE ?= 0xffff000000000000
+KERNEL_ASPACE_SIZE ?= 0x0001000000000000
+USER_ASPACE_BASE   ?= 0x0000000001000000
+USER_ASPACE_SIZE   ?= 0x0000fffffe000000
+
+GLOBAL_DEFINES += \
+    KERNEL_ASPACE_BASE=$(KERNEL_ASPACE_BASE) \
+    KERNEL_ASPACE_SIZE=$(KERNEL_ASPACE_SIZE) \
+    USER_ASPACE_BASE=$(USER_ASPACE_BASE) \
+    USER_ASPACE_SIZE=$(USER_ASPACE_SIZE)
+
+KERNEL_BASE ?= $(KERNEL_ASPACE_BASE)
+KERNEL_LOAD_OFFSET ?= 0
+
+GLOBAL_DEFINES += \
+    KERNEL_BASE=$(KERNEL_BASE) \
+    KERNEL_LOAD_OFFSET=$(KERNEL_LOAD_OFFSET)
+
+else
+
+KERNEL_BASE ?= $(MEMBASE)
+KERNEL_LOAD_OFFSET ?= 0
+
+endif
+
+GLOBAL_DEFINES += \
+	MEMBASE=$(MEMBASE) \
+	MEMSIZE=$(MEMSIZE)
+
+# try to find the toolchain
+include $(LOCAL_DIR)/toolchain.mk
+TOOLCHAIN_PREFIX := $(ARCH_$(ARCH)_TOOLCHAIN_PREFIX)
+$(info TOOLCHAIN_PREFIX = $(TOOLCHAIN_PREFIX))
+
+ARCH_COMPILEFLAGS += $(ARCH_$(ARCH)_COMPILEFLAGS)
+
+GLOBAL_LDFLAGS += -z max-page-size=4096
+
+
+# make sure some bits were set up
+MEMVARS_SET := 0
+ifneq ($(MEMBASE),)
+MEMVARS_SET := 1
+endif
+ifneq ($(MEMSIZE),)
+MEMVARS_SET := 1
+endif
+ifeq ($(MEMVARS_SET),0)
+$(error missing MEMBASE or MEMSIZE variable, please set in target rules.mk)
+endif
+
+# potentially generated files that should be cleaned out with clean make rule
+GENERATED += \
+	$(BUILDDIR)/system-onesegment.ld
+
+# rules for generating the linker script
+$(BUILDDIR)/system-onesegment.ld: $(LOCAL_DIR)/system-onesegment.ld $(wildcard arch/*.ld) linkerscript.phony
+	@echo generating $@
+	@$(MKDIR)
+	$(NOECHO)sed "s/%MEMBASE%/$(MEMBASE)/;s/%MEMSIZE%/$(MEMSIZE)/;s/%KERNEL_BASE%/$(KERNEL_BASE)/;s/%KERNEL_LOAD_OFFSET%/$(KERNEL_LOAD_OFFSET)/" < $< > $@.tmp
+	@$(call TESTANDREPLACEFILE,$@.tmp,$@)
+
+linkerscript.phony:
+.PHONY: linkerscript.phony
+
+include make/module.mk
diff --git a/src/bsp/lk/arch/arm64/spinlock.S b/src/bsp/lk/arch/arm64/spinlock.S
new file mode 100644
index 0000000..ef5b3d1
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/spinlock.S
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2014 Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <asm.h>
+
+.text
+
+FUNCTION(arch_spin_trylock)
+	mov	x2, x0
+	mov	x1, #1
+	ldaxr	x0, [x2]
+	cbnz	x0, 1f
+	stxr	w0, x1, [x2]
+1:
+	ret
+
+FUNCTION(arch_spin_lock)
+	mov	x1, #1
+	sevl
+1:
+	wfe
+	ldaxr	x2, [x0]
+	cbnz	x2, 1b
+	stxr	w2, x1, [x0]
+	cbnz	w2, 1b
+	ret
+
+FUNCTION(arch_spin_unlock)
+	stlr	xzr, [x0]
+	ret
diff --git a/src/bsp/lk/arch/arm64/start.S b/src/bsp/lk/arch/arm64/start.S
new file mode 100644
index 0000000..f36f5bf
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/start.S
@@ -0,0 +1,440 @@
+#include <asm.h>
+#include <arch/arm64/mmu.h>
+#include <arch/asm_macros.h>
+#include <arch/arch_ops.h>
+#include <kernel/vm.h>
+
+/*
+ * Register use:
+ *  x0-x3   Arguments
+ *  x9-x15  Scratch
+ *  x19-x28 Globals
+ */
+tmp                     .req x9
+tmp2                    .req x10
+wtmp2                   .req w10
+idx                     .req x11
+idx_shift               .req x12
+page_table              .req x13
+new_page_table          .req x14
+phys_offset             .req x15
+
+cpuid                   .req x19
+page_table0             .req x20
+page_table1             .req x21
+mmu_initial_mapping     .req x22
+vaddr                   .req x23
+paddr                   .req x24
+mapping_size            .req x25
+size                    .req x26
+attr                    .req x27
+
+.section .text.boot
+FUNCTION(_start)
+.globl arm_reset
+arm_reset:
+
+    bl      setup_el2_or_el3_exception_base
+
+    mrs     tmp, CurrentEL
+    cmp     tmp, #(0b11 << 2)
+    b.ne    .Lsetup_el2_or_el3_stack
+
+    /* el3 set secure timer */
+    ldr     tmp2, =13000000
+    msr     cntfrq_el0, tmp2
+
+    /* el3 enable smp bit */
+    mrs     tmp2, s3_1_c15_c2_1
+    orr     tmp2, tmp2, #(1<<6)
+    msr     s3_1_c15_c2_1, tmp2
+
+.Lsetup_el2_or_el3_stack:
+    /* set el2 or el3 stack pointer */
+    ldr     tmp2, = __stack_end
+    mov     sp, tmp2
+
+    /* initialization required in EL3. weak symbol at asm.S */
+    cmp     tmp, #(0b11 << 2)
+    b.ne    .LelX_to_el1
+    bl      platform_el3_init
+
+.LelX_to_el1:
+    /* change to el1 */
+    bl      arm64_elX_to_el1
+
+#if WITH_KERNEL_VM
+    /* enable caches so atomics and spinlocks work */
+    mrs     tmp, sctlr_el1
+    orr     tmp, tmp, #(1<<12) /* Enable icache */
+    orr     tmp, tmp, #(1<<2)  /* Enable dcache/ucache */
+    bic     tmp, tmp, #(1<<3)  /* Disable Stack Alignment Check */ /* TODO: don't use unaligned stacks */
+    msr     sctlr_el1, tmp
+
+    /* set up the mmu according to mmu_initial_mappings */
+
+    /* load the base of the translation table and clear the table */
+    adrp    page_table1, arm64_kernel_translation_table
+    add     page_table1, page_table1, #:lo12:arm64_kernel_translation_table
+
+    /* Prepare tt_trampoline page table */
+    /* Calculate pagetable physical addresses */
+    adrp    page_table0, tt_trampoline
+    add     page_table0, page_table0, #:lo12:tt_trampoline
+
+#if WITH_SMP
+    mrs     cpuid, mpidr_el1
+    ubfx    cpuid, cpuid, #0, #SMP_CPU_ID_BITS
+    cbnz    cpuid, .Lmmu_enable_secondary
+#endif
+
+    mov     tmp, #0
+
+    /* walk through all the entries in the translation table, setting them up */
+.Lclear_top_page_table_loop:
+    str     xzr, [page_table1, tmp, lsl #3]
+    add     tmp, tmp, #1
+    cmp     tmp, #MMU_KERNEL_PAGE_TABLE_ENTRIES_TOP
+    bne     .Lclear_top_page_table_loop
+
+    /* load the address of the mmu_initial_mappings table and start processing */
+    adrp    mmu_initial_mapping, mmu_initial_mappings
+    add     mmu_initial_mapping, mmu_initial_mapping, #:lo12:mmu_initial_mappings
+
+.Linitial_mapping_loop:
+/* Read entry of mmu_initial_mappings (likely defined in platform.c) */
+    ldp     paddr, vaddr, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_PHYS_OFFSET]
+    ldp     size, tmp, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_SIZE_OFFSET]
+
+    tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_DYNAMIC, .Lnot_dynamic
+    adr     paddr, _start
+    mov     size, x0
+    str     paddr, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_PHYS_OFFSET]
+    str     size, [mmu_initial_mapping, #__MMU_INITIAL_MAPPING_SIZE_OFFSET]
+
+.Lnot_dynamic:
+    /* if size == 0, end of list, done with initial mapping */
+    cbz     size, .Linitial_mapping_done
+    mov     mapping_size, size
+
+    /* set up the flags */
+    tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_UNCACHED, .Lnot_uncached
+    ldr     attr, =MMU_INITIAL_MAP_STRONGLY_ORDERED
+    b       .Lmem_type_done
+
+.Lnot_uncached:
+    /* is this memory mapped to device/peripherals? */
+    tbzmask tmp, MMU_INITIAL_MAPPING_FLAG_DEVICE, .Lnot_device
+    ldr     attr, =MMU_INITIAL_MAP_DEVICE
+    b       .Lmem_type_done
+.Lnot_device:
+
+/* Determine the segment in which the memory resides and set appropriate
+ *  attributes.  In order to handle offset kernels, the following rules are
+ *  implemented below:
+ *      KERNEL_BASE    to __code_start             -read/write (see note below)
+ *      __code_start   to __rodata_start (.text)   -read only
+ *      __rodata_start to __data_start   (.rodata) -read only, execute never
+ *      __data_start   to .....          (.data)   -read/write
+ *
+ *  The space below __code_start is presently left as read/write (same as .data)
+ *   mainly as a workaround for the raspberry pi boot process.  Boot vectors for
+ *   secondary CPUs are in this area and need to be updated by cpu0 once the system
+ *   is ready to boot the secondary processors.
+ *   TODO: handle this via mmu_initial_mapping entries, which may need to be
+ *         extended with additional flag types
+ */
+.Lmapping_size_loop:
+    ldr     attr, =MMU_PTE_KERNEL_DATA_FLAGS
+    ldr     tmp, =__code_start
+    subs    size, tmp, vaddr
+    /* If page is below  the entry point (_start) mark as kernel data */
+    b.hi    .Lmem_type_done
+
+    ldr     attr, =MMU_PTE_KERNEL_RO_FLAGS
+    ldr     tmp, =__rodata_start
+    subs    size, tmp, vaddr
+    b.hi    .Lmem_type_done
+    orr     attr, attr, #MMU_PTE_ATTR_PXN
+    ldr     tmp, =__data_start
+    subs    size, tmp, vaddr
+    b.hi    .Lmem_type_done
+    ldr     attr, =MMU_PTE_KERNEL_DATA_FLAGS
+    ldr     tmp, =_end
+    subs    size, tmp, vaddr
+    b.lo    . /* Error: _end < vaddr */
+    cmp     mapping_size, size
+    b.lo    . /* Error: mapping_size < size => RAM size too small for data/bss */
+    mov     size, mapping_size
+
+.Lmem_type_done:
+    subs    mapping_size, mapping_size, size
+    b.lo    . /* Error: mapping_size < size (RAM size too small for code/rodata?) */
+
+    /* Check that paddr, vaddr and size are page aligned */
+    orr     tmp, vaddr, paddr
+    orr     tmp, tmp, size
+    tst     tmp, #(1 << MMU_KERNEL_PAGE_SIZE_SHIFT) - 1
+    bne     . /* Error: not page aligned */
+
+    /* Clear top bits of virtual address (should be all set) */
+    eor     vaddr, vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT)
+
+    /* Check that top bits were all set */
+    tst     vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT)
+    bne     . /* Error: vaddr out of range */
+
+.Lmap_range_top_loop:
+    /* Select top level page table */
+    mov     page_table, page_table1
+    mov     idx_shift, #MMU_KERNEL_TOP_SHIFT
+
+    lsr     idx, vaddr, idx_shift
+
+
+/* determine the type of page table entry to use given alignment and size
+ *  of the chunk of memory we are mapping
+ */
+.Lmap_range_one_table_loop:
+    /* Check if current level allow block descriptors */
+    cmp     idx_shift, #MMU_PTE_DESCRIPTOR_BLOCK_MAX_SHIFT
+    b.hi    .Lmap_range_need_page_table
+
+    /* Check if paddr and vaddr alignment allows a block descriptor */
+    orr     tmp2, vaddr, paddr
+    lsr     tmp, tmp2, idx_shift
+    lsl     tmp, tmp, idx_shift
+    cmp     tmp, tmp2
+    b.ne    .Lmap_range_need_page_table
+
+    /* Check if size is large enough for a block mapping */
+    lsr     tmp, size, idx_shift
+    cbz     tmp, .Lmap_range_need_page_table
+
+    /* Select descriptor type, page for level 3, block for level 0-2 */
+    orr     tmp, attr, #MMU_PTE_L3_DESCRIPTOR_PAGE
+    cmp     idx_shift, MMU_KERNEL_PAGE_SIZE_SHIFT
+    beq     .Lmap_range_l3
+    orr     tmp, attr, #MMU_PTE_L012_DESCRIPTOR_BLOCK
+.Lmap_range_l3:
+
+    /* Write page table entry */
+    orr     tmp, tmp, paddr
+    str     tmp, [page_table, idx, lsl #3]
+
+    /* Move to next page table entry */
+    mov     tmp, #1
+    lsl     tmp, tmp, idx_shift
+    add     vaddr, vaddr, tmp
+    add     paddr, paddr, tmp
+    subs    size, size, tmp
+    /* TODO: add local loop if next entry is in the same page table */
+    b.ne    .Lmap_range_top_loop /* size != 0 */
+
+    /* Restore top bits of virtual address (should be all set) */
+    eor     vaddr, vaddr, #(~0 << MMU_KERNEL_SIZE_SHIFT)
+    /* Move to next subtype of ram mmu_initial_mappings entry */
+    cbnz     mapping_size, .Lmapping_size_loop
+
+    /* Move to next mmu_initial_mappings entry */
+    add     mmu_initial_mapping, mmu_initial_mapping, __MMU_INITIAL_MAPPING_SIZE
+    b       .Linitial_mapping_loop
+
+.Lmap_range_need_page_table:
+    /* Check if page table entry is unused */
+    ldr     new_page_table, [page_table, idx, lsl #3]
+    cbnz    new_page_table, .Lmap_range_has_page_table
+
+    /* Calculate phys offset (needed for memory allocation) */
+.Lphys_offset:
+    adr     phys_offset, .Lphys_offset /* phys */
+    ldr     tmp, =.Lphys_offset /* virt */
+    sub     phys_offset, tmp, phys_offset
+
+    /* Allocate new page table */
+    calloc_bootmem_aligned new_page_table, tmp, tmp2, MMU_KERNEL_PAGE_SIZE_SHIFT, phys_offset
+
+    /* Write page table entry (with allocated page table) */
+    orr     new_page_table, new_page_table, #MMU_PTE_L012_DESCRIPTOR_TABLE
+    str     new_page_table, [page_table, idx, lsl #3]
+
+.Lmap_range_has_page_table:
+    /* Check descriptor type */
+    and     tmp, new_page_table, #MMU_PTE_DESCRIPTOR_MASK
+    cmp     tmp, #MMU_PTE_L012_DESCRIPTOR_TABLE
+    b.ne    . /* Error: entry already in use (as a block entry) */
+
+    /* switch to next page table level */
+    bic     page_table, new_page_table, #MMU_PTE_DESCRIPTOR_MASK
+    mov     tmp, #~0
+    lsl     tmp, tmp, idx_shift
+    bic     tmp, vaddr, tmp
+    sub     idx_shift, idx_shift, #(MMU_KERNEL_PAGE_SIZE_SHIFT - 3)
+    lsr     idx, tmp, idx_shift
+
+    b       .Lmap_range_one_table_loop
+
+.Linitial_mapping_done:
+
+    /* Prepare tt_trampoline page table */
+
+    /* Zero tt_trampoline translation tables */
+    mov     tmp, #0
+.Lclear_tt_trampoline:
+    str     xzr, [page_table0, tmp, lsl#3]
+    add     tmp, tmp, #1
+    cmp     tmp, #MMU_PAGE_TABLE_ENTRIES_IDENT
+    blt     .Lclear_tt_trampoline
+
+    /* Setup mapping at phys -> phys */
+    adr     tmp, .Lmmu_on_pc
+    lsr     tmp, tmp, #MMU_IDENT_TOP_SHIFT    /* tmp = paddr index */
+    ldr     tmp2, =MMU_PTE_IDENT_FLAGS
+    add     tmp2, tmp2, tmp, lsl #MMU_IDENT_TOP_SHIFT  /* tmp2 = pt entry */
+
+    str     tmp2, [page_table0, tmp, lsl #3]     /* tt_trampoline[paddr index] = pt entry */
+
+#if WITH_SMP
+    adrp    tmp, page_tables_not_ready
+    add     tmp, tmp, #:lo12:page_tables_not_ready
+    str     wzr, [tmp]
+    b       .Lpage_tables_ready
+
+.Lmmu_enable_secondary:
+    adrp    tmp, page_tables_not_ready
+    add     tmp, tmp, #:lo12:page_tables_not_ready
+.Lpage_tables_not_ready:
+    ldr     wtmp2, [tmp]
+    cbnz    wtmp2, .Lpage_tables_not_ready
+.Lpage_tables_ready:
+#endif
+
+    /* set up the mmu */
+
+    /* Invalidate TLB */
+    tlbi    vmalle1is
+    isb
+    dsb     sy
+
+    /* Initialize Memory Attribute Indirection Register */
+    ldr     tmp, =MMU_MAIR_VAL
+    msr     mair_el1, tmp
+
+    /* Initialize TCR_EL1 */
+    /* set cacheable attributes on translation walk */
+    /* (SMP extensions) non-shareable, inner write-back write-allocate */
+    ldr     tmp, =MMU_TCR_FLAGS_IDENT
+    msr     tcr_el1, tmp
+
+    isb
+
+    /* Write ttbr with phys addr of the translation table */
+    msr     ttbr0_el1, page_table0
+    msr     ttbr1_el1, page_table1
+    isb
+
+    /* Read SCTLR */
+    mrs     tmp, sctlr_el1
+
+    /* Turn on the MMU */
+    orr     tmp, tmp, #0x1
+
+    /* Write back SCTLR */
+    msr     sctlr_el1, tmp
+.Lmmu_on_pc:
+    isb
+
+    /* Jump to virtual code address */
+    ldr     tmp, =.Lmmu_on_vaddr
+    br      tmp
+
+.Lmmu_on_vaddr:
+
+    /* Disable trampoline page-table in ttbr0 */
+    ldr     tmp, =MMU_TCR_FLAGS_KERNEL
+    msr     tcr_el1, tmp
+    isb
+
+
+    /* Invalidate TLB */
+    tlbi    vmalle1
+    isb
+
+#if WITH_SMP
+    cbnz    cpuid, .Lsecondary_boot
+#endif
+#endif /* WITH_KERNEL_VM */
+
+    ldr tmp, =__stack_end
+    mov sp, tmp
+
+    /* clear bss */
+.L__do_bss:
+    /* clear out the bss excluding the stack and kernel translation table  */
+    /* NOTE: relies on __post_prebss_bss_start and __bss_end being 8 byte aligned */
+    ldr     tmp, =__post_prebss_bss_start
+    ldr     tmp2, =__bss_end
+    sub     tmp2, tmp2, tmp
+    cbz     tmp2, .L__bss_loop_done
+.L__bss_loop:
+    sub     tmp2, tmp2, #8
+    str     xzr, [tmp], #8
+    cbnz    tmp2, .L__bss_loop
+.L__bss_loop_done:
+
+    bl  lk_main
+    b   .
+
+#if WITH_SMP
+.Lsecondary_boot:
+    and     tmp, cpuid, #0xff
+    cmp     tmp, #(1 << SMP_CPU_CLUSTER_SHIFT)
+    bge     .Lunsupported_cpu_trap
+    bic     cpuid, cpuid, #0xff
+    orr     cpuid, tmp, cpuid, LSR #(8 - SMP_CPU_CLUSTER_SHIFT)
+    adrp    tmp, linear_cpuid_map
+    add     tmp, tmp, #:lo12:linear_cpuid_map
+    ldr     tmp, [tmp]
+    cbz     tmp, .Lno_cpuid_remap
+    add     tmp, tmp, cpuid
+    ldrb    wtmp2, [tmp]
+    ubfx    cpuid, tmp2, #0, #31
+
+.Lno_cpuid_remap:
+    cmp     cpuid, #SMP_MAX_CPUS
+    bge     .Lunsupported_cpu_trap
+
+    /* Set up the stack */
+    ldr     tmp, =__stack_end
+    mov     tmp2, #ARCH_DEFAULT_STACK_SIZE
+    mul     tmp2, tmp2, cpuid
+    sub     sp, tmp, tmp2
+
+    mov     x0, cpuid
+    bl      arm64_secondary_entry
+
+.Lunsupported_cpu_trap:
+    wfe
+    b       .Lunsupported_cpu_trap
+#endif
+
+.ltorg
+
+#if WITH_SMP
+.data
+DATA(page_tables_not_ready)
+    .long       1
+#endif
+
+.section .bss.prebss.stack
+    .align 4
+DATA(__stack)
+    .skip ARCH_DEFAULT_STACK_SIZE * SMP_MAX_CPUS
+DATA(__stack_end)
+
+#if WITH_KERNEL_VM
+.section ".bss.prebss.translation_table"
+.align 3 + MMU_PAGE_TABLE_ENTRIES_IDENT_SHIFT
+DATA(tt_trampoline)
+    .skip 8 * MMU_PAGE_TABLE_ENTRIES_IDENT
+#endif
diff --git a/src/bsp/lk/arch/arm64/system-onesegment.ld b/src/bsp/lk/arch/arm64/system-onesegment.ld
new file mode 100644
index 0000000..b750b10
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/system-onesegment.ld
@@ -0,0 +1,123 @@
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+
+ENTRY(_start)
+SECTIONS
+{
+    . = %KERNEL_BASE% + %KERNEL_LOAD_OFFSET%;
+
+    /* text/read-only data */
+    /* set the load address to physical MEMBASE */
+    .text : AT(%MEMBASE% + %KERNEL_LOAD_OFFSET%) {
+        __code_start = .;
+        KEEP(*(.text.boot))
+        KEEP(*(.text.boot.vectab))
+        *(.text* .sram.text.glue_7* .gnu.linkonce.t.*)
+    }
+
+    .interp : { *(.interp) }
+    .hash : { *(.hash) }
+    .dynsym : { *(.dynsym) }
+    .dynstr : { *(.dynstr) }
+    .rel.text : { *(.rel.text) *(.rel.gnu.linkonce.t*) }
+    .rela.text : { *(.rela.text) *(.rela.gnu.linkonce.t*) }
+    .rel.data : { *(.rel.data) *(.rel.gnu.linkonce.d*) }
+    .rela.data : { *(.rela.data) *(.rela.gnu.linkonce.d*) }
+    .rel.rodata : { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
+    .rela.rodata : { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+    .rel.got : { *(.rel.got) }
+    .rela.got : { *(.rela.got) }
+    .rel.ctors : { *(.rel.ctors) }
+    .rela.ctors : { *(.rela.ctors) }
+    .rel.dtors : { *(.rel.dtors) }
+    .rela.dtors : { *(.rela.dtors) }
+    .rel.init : { *(.rel.init) }
+    .rela.init : { *(.rela.init) }
+    .rel.fini : { *(.rel.fini) }
+    .rela.fini : { *(.rela.fini) }
+    .rel.bss : { *(.rel.bss) }
+    .rela.bss : { *(.rela.bss) }
+    .rel.plt : { *(.rel.plt) }
+    .rela.plt : { *(.rela.plt) }
+    .init : { *(.init) } =0x9090
+    .plt : { *(.plt) }
+
+    /* .ARM.exidx is sorted, so has to go in its own output section.  */
+    __exidx_start = .;
+    .ARM.exidx : { *(.ARM.exidx* .gnu.linkonce.armexidx.*) }
+    __exidx_end = .;
+
+    .dummy_post_text : {
+	    __code_end = .;
+    }
+
+    .rodata : ALIGN(4096) {
+        __rodata_start = .;
+        __fault_handler_table_start = .;
+        KEEP(*(.rodata.fault_handler_table))
+        __fault_handler_table_end = .;
+        *(.rodata .rodata.* .gnu.linkonce.r.*)
+    }
+
+    /*
+     * extra linker scripts tend to insert sections just after .rodata,
+     * so we want to make sure this symbol comes after anything inserted above,
+     * but not aligned to the next section necessarily.
+     */
+    .dummy_post_rodata : {
+        __rodata_end = .;
+    }
+
+    .data : ALIGN(4096) {
+        /* writable data  */
+        __data_start_rom = .;
+        /* in one segment binaries, the rom data address is on top of the ram data address */
+        __data_start = .;
+        *(.data .data.* .gnu.linkonce.d.*)
+    }
+
+    .ctors : ALIGN(8) {
+        __ctor_list = .;
+        KEEP(*(.ctors .init_array))
+        __ctor_end = .;
+    }
+    .dtors : ALIGN(8) {
+        __dtor_list = .;
+        KEEP(*(.dtors .fini_array))
+        __dtor_end = .;
+    }
+    .got : { *(.got.plt) *(.got) }
+    .dynamic : { *(.dynamic) }
+
+    /*
+     * extra linker scripts tend to insert sections just after .data,
+     * so we want to make sure this symbol comes after anything inserted above,
+     * but not aligned to the next section necessarily.
+     */
+    .dummy_post_data : {
+        __data_end = .;
+    }
+
+    /* unintialized data (in same segment as writable data) */
+    .bss : ALIGN(4096) {
+        __bss_start = .;
+        KEEP(*(.bss.prebss.*))
+        . = ALIGN(8);
+	__post_prebss_bss_start = .;
+        *(.bss .bss.*)
+        *(.gnu.linkonce.b.*)
+        *(COMMON)
+        . = ALIGN(8);
+        __bss_end = .;
+    }
+
+    /* Align the end to ensure anything after the kernel ends up on its own pages */
+    . = ALIGN(4096);
+    _end = .;
+
+    . = %KERNEL_BASE% + %MEMSIZE%;
+    _end_of_ram = .;
+
+    /* Strip unnecessary stuff */
+    /DISCARD/ : { *(.comment .note .eh_frame) }
+}
diff --git a/src/bsp/lk/arch/arm64/thread.c b/src/bsp/lk/arch/arm64/thread.c
new file mode 100644
index 0000000..56b0bf9
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/thread.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <sys/types.h>
+#include <string.h>
+#include <stdlib.h>
+#include <debug.h>
+#include <trace.h>
+#include <kernel/thread.h>
+#include <arch/arm64.h>
+
+#define LOCAL_TRACE 0
+
+struct context_switch_frame {
+    vaddr_t lr;
+    vaddr_t r18;
+    vaddr_t r19;
+    vaddr_t r20;
+    vaddr_t r21;
+    vaddr_t r22;
+    vaddr_t r23;
+    vaddr_t r24;
+    vaddr_t r25;
+    vaddr_t r26;
+    vaddr_t r27;
+    vaddr_t r28;
+    vaddr_t r29;
+    vaddr_t padding;
+};
+
+extern void arm64_context_switch(addr_t *old_sp, addr_t new_sp);
+
+static void initial_thread_func(void) __NO_RETURN;
+static void initial_thread_func(void)
+{
+    int ret;
+
+    thread_t *current_thread = get_current_thread();
+
+    LTRACEF("initial_thread_func: thread %p calling %p with arg %p\n", current_thread, current_thread->entry, current_thread->arg);
+
+    /* release the thread lock that was implicitly held across the reschedule */
+    spin_unlock(&thread_lock);
+    arch_enable_ints();
+
+    ret = current_thread->entry(current_thread->arg);
+
+    LTRACEF("initial_thread_func: thread %p exiting with %d\n", current_thread, ret);
+
+    thread_exit(ret);
+}
+
+void arch_thread_initialize(thread_t *t)
+{
+    // create a default stack frame on the stack
+    vaddr_t stack_top = (vaddr_t)t->stack + t->stack_size;
+
+    // make sure the top of the stack is 16 byte aligned for EABI compliance
+    stack_top = ROUNDDOWN(stack_top, 16);
+
+    struct context_switch_frame *frame = (struct context_switch_frame *)(stack_top);
+    frame--;
+
+    // fill it in
+    memset(frame, 0, sizeof(*frame));
+    frame->lr = (vaddr_t)&initial_thread_func;
+
+    // set the stack pointer
+    t->arch.sp = (vaddr_t)frame;
+}
+
+void arch_context_switch(thread_t *oldthread, thread_t *newthread)
+{
+    LTRACEF("old %p (%s), new %p (%s)\n", oldthread, oldthread->name, newthread, newthread->name);
+    arm64_fpu_pre_context_switch(oldthread);
+    arm64_context_switch(&oldthread->arch.sp, newthread->arch.sp);
+}
+
+void arch_dump_thread(thread_t *t)
+{
+    if (t->state != THREAD_RUNNING) {
+        dprintf(INFO, "\tarch: ");
+        dprintf(INFO, "sp 0x%lx\n", t->arch.sp);
+    }
+}
diff --git a/src/bsp/lk/arch/arm64/toolchain.mk b/src/bsp/lk/arch/arm64/toolchain.mk
new file mode 100644
index 0000000..88a17a1
--- /dev/null
+++ b/src/bsp/lk/arch/arm64/toolchain.mk
@@ -0,0 +1,34 @@
+ifndef ARCH_arm64_TOOLCHAIN_PREFIX
+ARCH_arm64_TOOLCHAIN_PREFIX := aarch64-elf-
+FOUNDTOOL=$(shell which $(ARCH_arm64_TOOLCHAIN_PREFIX)gcc)
+ifeq ($(FOUNDTOOL),)
+ARCH_arm64_TOOLCHAIN_PREFIX := aarch64-linux-android-
+FOUNDTOOL=$(shell which $(ARCH_arm64_TOOLCHAIN_PREFIX)gcc)
+ifeq ($(FOUNDTOOL),)
+$(error cannot find toolchain, please set ARCH_arm64_TOOLCHAIN_PREFIX or add it to your path)
+endif
+endif
+endif
+
+ifeq (false,$(call TOBOOL,$(ALLOW_FP_USE)))
+ARCH_arm64_COMPILEFLAGS := -mgeneral-regs-only -DWITH_NO_FP=1
+else
+ARCH_arm64_COMPILEFLAGS :=
+endif
+
+ifeq ($(call TOBOOL,$(CLANGBUILD)),true)
+
+CLANG_ARM64_TARGET_SYS ?= linux
+CLANG_ARM64_TARGET_ABI ?= gnu
+
+CLANG_ARM64_AS_DIR ?= $(shell dirname $(shell dirname $(ARCH_arm64_TOOLCHAIN_PREFIX)))
+
+ARM64_AS_PATH ?= $(wildcard $(CLANG_ARM64_AS_DIR)/*/bin/as)
+ifeq ($(ARM64_AS_PATH),)
+$(error Could not find $(CLANG_ARM64_AS_DIR)/*/bin/as, did the directory structure change?)
+endif
+
+ARCH_arm64_COMPILEFLAGS += -target aarch64-$(CLANG_ARM64_TARGET_SYS)-$(CLANG_ARM64_TARGET_ABI) \
+			   --gcc-toolchain=$(CLANG_ARM64_AS_DIR)/
+
+endif