ASR_BASE

Change-Id: Icf3719cc0afe3eeb3edc7fa80a2eb5199ca9dda1
diff --git a/marvell/linux/arch/mips/include/asm/barrier.h b/marvell/linux/arch/mips/include/asm/barrier.h
new file mode 100644
index 0000000..9228f73
--- /dev/null
+++ b/marvell/linux/arch/mips/include/asm/barrier.h
@@ -0,0 +1,294 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
+ */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#include <asm/addrspace.h>
+
+/*
+ * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
+ * These values are used with the sync instruction to perform memory barriers.
+ * Types of ordering guarantees available through the SYNC instruction:
+ * - Completion Barriers
+ * - Ordering Barriers
+ * As compared to the completion barrier, the ordering barrier is a
+ * lighter-weight operation as it does not require the specified instructions
+ * before the SYNC to be already completed. Instead it only requires that those
+ * specified instructions which are subsequent to the SYNC in the instruction
+ * stream are never re-ordered for processing ahead of the specified
+ * instructions which are before the SYNC in the instruction stream.
+ * This potentially reduces how many cycles the barrier instruction must stall
+ * before it completes.
+ * Implementations that do not use any of the non-zero values of stype to define
+ * different barriers, such as ordering barriers, must make those stype values
+ * act the same as stype zero.
+ */
+
+/*
+ * Completion barriers:
+ * - Every synchronizable specified memory instruction (loads or stores or both)
+ *   that occurs in the instruction stream before the SYNC instruction must be
+ *   already globally performed before any synchronizable specified memory
+ *   instructions that occur after the SYNC are allowed to be performed, with
+ *   respect to any other processor or coherent I/O module.
+ *
+ * - The barrier does not guarantee the order in which instruction fetches are
+ *   performed.
+ *
+ * - A stype value of zero will always be defined such that it performs the most
+ *   complete set of synchronization operations that are defined.This means
+ *   stype zero always does a completion barrier that affects both loads and
+ *   stores preceding the SYNC instruction and both loads and stores that are
+ *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
+ *   by the architecture or specific implementations to perform synchronization
+ *   behaviors that are less complete than that of stype zero. If an
+ *   implementation does not use one of these non-zero values to define a
+ *   different synchronization behavior, then that non-zero value of stype must
+ *   act the same as stype zero completion barrier. This allows software written
+ *   for an implementation with a lighter-weight barrier to work on another
+ *   implementation which only implements the stype zero completion barrier.
+ *
+ * - A completion barrier is required, potentially in conjunction with SSNOP (in
+ *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
+ *   to guarantee that memory reference results are visible across operating
+ *   mode changes. For example, a completion barrier is required on some
+ *   implementations on entry to and exit from Debug Mode to guarantee that
+ *   memory effects are handled correctly.
+ */
+
+/*
+ * stype 0 - A completion barrier that affects preceding loads and stores and
+ * subsequent loads and stores.
+ * Older instructions which must reach the load/store ordering point before the
+ * SYNC instruction completes: Loads, Stores
+ * Younger instructions which must reach the load/store ordering point only
+ * after the SYNC instruction completes: Loads, Stores
+ * Older instructions which must be globally performed when the SYNC instruction
+ * completes: Loads, Stores
+ */
+#define STYPE_SYNC 0x0
+
+/*
+ * Ordering barriers:
+ * - Every synchronizable specified memory instruction (loads or stores or both)
+ *   that occurs in the instruction stream before the SYNC instruction must
+ *   reach a stage in the load/store datapath after which no instruction
+ *   re-ordering is possible before any synchronizable specified memory
+ *   instruction which occurs after the SYNC instruction in the instruction
+ *   stream reaches the same stage in the load/store datapath.
+ *
+ * - If any memory instruction before the SYNC instruction in program order,
+ *   generates a memory request to the external memory and any memory
+ *   instruction after the SYNC instruction in program order also generates a
+ *   memory request to external memory, the memory request belonging to the
+ *   older instruction must be globally performed before the time the memory
+ *   request belonging to the younger instruction is globally performed.
+ *
+ * - The barrier does not guarantee the order in which instruction fetches are
+ *   performed.
+ */
+
+/*
+ * stype 0x10 - An ordering barrier that affects preceding loads and stores and
+ * subsequent loads and stores.
+ * Older instructions which must reach the load/store ordering point before the
+ * SYNC instruction completes: Loads, Stores
+ * Younger instructions which must reach the load/store ordering point only
+ * after the SYNC instruction completes: Loads, Stores
+ * Older instructions which must be globally performed when the SYNC instruction
+ * completes: N/A
+ */
+#define STYPE_SYNC_MB 0x10
+
+/*
+ * stype 0x14 - A completion barrier specific to global invalidations
+ *
+ * When a sync instruction of this type completes any preceding GINVI or GINVT
+ * operation has been globalized & completed on all coherent CPUs. Anything
+ * that the GINV* instruction should invalidate will have been invalidated on
+ * all coherent CPUs when this instruction completes. It is implementation
+ * specific whether the GINV* instructions themselves will ensure completion,
+ * or this sync type will.
+ *
+ * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
+ * this sync type also requires that previous SYNCI operations have completed.
+ */
+#define STYPE_GINV	0x14
+
+#ifdef CONFIG_CPU_HAS_SYNC
+#define __sync()				\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		".set	noreorder\n\t"		\
+		".set	mips2\n\t"		\
+		"sync\n\t"			\
+		".set	pop"			\
+		: /* no output */		\
+		: /* no input */		\
+		: "memory")
+#else
+#define __sync()	do { } while(0)
+#endif
+
+#define __fast_iob()				\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		".set	noreorder\n\t"		\
+		"lw	$0,%0\n\t"		\
+		"nop\n\t"			\
+		".set	pop"			\
+		: /* no output */		\
+		: "m" (*(int *)CKSEG1)		\
+		: "memory")
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
+# define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
+
+# define fast_wmb()	__syncw()
+# define fast_rmb()	barrier()
+# define fast_mb()	__sync()
+# define fast_iob()	do { } while (0)
+#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
+# define fast_wmb()	__sync()
+# define fast_rmb()	__sync()
+# define fast_mb()	__sync()
+# ifdef CONFIG_SGI_IP28
+#  define fast_iob()				\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		".set	noreorder\n\t"		\
+		"lw	$0,%0\n\t"		\
+		"sync\n\t"			\
+		"lw	$0,%0\n\t"		\
+		".set	pop"			\
+		: /* no output */		\
+		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
+		: "memory")
+# else
+#  define fast_iob()				\
+	do {					\
+		__sync();			\
+		__fast_iob();			\
+	} while (0)
+# endif
+#endif /* CONFIG_CPU_CAVIUM_OCTEON */
+
+#ifdef CONFIG_CPU_HAS_WB
+
+#include <asm/wbflush.h>
+
+#define mb()		wbflush()
+#define iob()		wbflush()
+
+#else /* !CONFIG_CPU_HAS_WB */
+
+#define mb()		fast_mb()
+#define iob()		fast_iob()
+
+#endif /* !CONFIG_CPU_HAS_WB */
+
+#define wmb()		fast_wmb()
+#define rmb()		fast_rmb()
+
+#if defined(CONFIG_WEAK_ORDERING)
+# ifdef CONFIG_CPU_CAVIUM_OCTEON
+#  define __smp_mb()	__sync()
+#  define __smp_rmb()	barrier()
+#  define __smp_wmb()	__syncw()
+# else
+#  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
+#  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
+#  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
+# endif
+#else
+#define __smp_mb()	barrier()
+#define __smp_rmb()	barrier()
+#define __smp_wmb()	barrier()
+#endif
+
+/*
+ * When LL/SC does imply order, it must also be a compiler barrier to avoid the
+ * compiler from reordering where the CPU will not. When it does not imply
+ * order, the compiler is also free to reorder across the LL/SC loop and
+ * ordering will be done by smp_llsc_mb() and friends.
+ */
+#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
+#define __WEAK_LLSC_MB		"	sync	\n"
+#define smp_llsc_mb()		__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#define __LLSC_CLOBBER
+#else
+#define __WEAK_LLSC_MB		"		\n"
+#define smp_llsc_mb()		do { } while (0)
+#define __LLSC_CLOBBER		"memory"
+#endif
+
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#define smp_mb__before_llsc() smp_wmb()
+#define __smp_mb__before_llsc() __smp_wmb()
+/* Cause previous writes to become visible on all CPUs as soon as possible */
+#define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
+					    ".set arch=octeon\n\t"	\
+					    "syncw\n\t"			\
+					    ".set pop" : : : "memory")
+#else
+#define smp_mb__before_llsc() smp_llsc_mb()
+#define __smp_mb__before_llsc() smp_llsc_mb()
+#define nudge_writes() mb()
+#endif
+
+#define __smp_mb__before_atomic()	__smp_mb__before_llsc()
+#define __smp_mb__after_atomic()	smp_llsc_mb()
+
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or prefetch) in between an LL & SC can cause the SC instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the LL in program order may actually
+ *    be executed after the LL - this is the reordering case.
+ *
+ *    In order to avoid this we need to place a memory barrier (ie. a SYNC
+ *    instruction) prior to every LL instruction, in between it and any earlier
+ *    memory access instructions.
+ *
+ *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an LL & SC with a target outside
+ *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
+ *    or similar, then misprediction of the branch may allow speculative
+ *    execution of memory accesses from outside of the LL-SC loop.
+ *
+ *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
+ *    at each affected branch target, for which we also use loongson_llsc_mb()
+ *    defined below.
+ *
+ *    This case affects all current Loongson 3 CPUs.
+ *
+ * The above described cases cause an error in the cache coherence protocol;
+ * such that the Invalidate of a competing LL-SC goes 'missing' and SC
+ * erroneously observes its core still has Exclusive state and lets the SC
+ * proceed.
+ *
+ * Therefore the error only occurs on SMP systems.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
+#define loongson_llsc_mb()	__asm__ __volatile__("sync" : : :"memory")
+#else
+#define loongson_llsc_mb()	do { } while (0)
+#endif
+
+static inline void sync_ginv(void)
+{
+	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
+}
+
+#include <asm-generic/barrier.h>
+
+#endif /* __ASM_BARRIER_H */