arch/mips/include/asm/barrier.h

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
sync_ginv
   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
   7  */
   8 #ifndef __ASM_BARRIER_H
   9 #define __ASM_BARRIER_H
  10 
  11 #include <asm/addrspace.h>
  12 
  13 /*
  14  * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
  15  * These values are used with the sync instruction to perform memory barriers.
  16  * Types of ordering guarantees available through the SYNC instruction:
  17  * - Completion Barriers
  18  * - Ordering Barriers
  19  * As compared to the completion barrier, the ordering barrier is a
  20  * lighter-weight operation as it does not require the specified instructions
  21  * before the SYNC to be already completed. Instead it only requires that those
  22  * specified instructions which are subsequent to the SYNC in the instruction
  23  * stream are never re-ordered for processing ahead of the specified
  24  * instructions which are before the SYNC in the instruction stream.
  25  * This potentially reduces how many cycles the barrier instruction must stall
  26  * before it completes.
  27  * Implementations that do not use any of the non-zero values of stype to define
  28  * different barriers, such as ordering barriers, must make those stype values
  29  * act the same as stype zero.
  30  */
  31 
  32 /*
  33  * Completion barriers:
  34  * - Every synchronizable specified memory instruction (loads or stores or both)
  35  *   that occurs in the instruction stream before the SYNC instruction must be
  36  *   already globally performed before any synchronizable specified memory
  37  *   instructions that occur after the SYNC are allowed to be performed, with
  38  *   respect to any other processor or coherent I/O module.
  39  *
  40  * - The barrier does not guarantee the order in which instruction fetches are
  41  *   performed.
  42  *
  43  * - A stype value of zero will always be defined such that it performs the most
  44  *   complete set of synchronization operations that are defined.This means
  45  *   stype zero always does a completion barrier that affects both loads and
  46  *   stores preceding the SYNC instruction and both loads and stores that are
  47  *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
  48  *   by the architecture or specific implementations to perform synchronization
  49  *   behaviors that are less complete than that of stype zero. If an
  50  *   implementation does not use one of these non-zero values to define a
  51  *   different synchronization behavior, then that non-zero value of stype must
  52  *   act the same as stype zero completion barrier. This allows software written
  53  *   for an implementation with a lighter-weight barrier to work on another
  54  *   implementation which only implements the stype zero completion barrier.
  55  *
  56  * - A completion barrier is required, potentially in conjunction with SSNOP (in
  57  *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
  58  *   to guarantee that memory reference results are visible across operating
  59  *   mode changes. For example, a completion barrier is required on some
  60  *   implementations on entry to and exit from Debug Mode to guarantee that
  61  *   memory effects are handled correctly.
  62  */
  63 
  64 /*
  65  * stype 0 - A completion barrier that affects preceding loads and stores and
  66  * subsequent loads and stores.
  67  * Older instructions which must reach the load/store ordering point before the
  68  * SYNC instruction completes: Loads, Stores
  69  * Younger instructions which must reach the load/store ordering point only
  70  * after the SYNC instruction completes: Loads, Stores
  71  * Older instructions which must be globally performed when the SYNC instruction
  72  * completes: Loads, Stores
  73  */
  74 #define STYPE_SYNC 0x0
  75 
  76 /*
  77  * Ordering barriers:
  78  * - Every synchronizable specified memory instruction (loads or stores or both)
  79  *   that occurs in the instruction stream before the SYNC instruction must
  80  *   reach a stage in the load/store datapath after which no instruction
  81  *   re-ordering is possible before any synchronizable specified memory
  82  *   instruction which occurs after the SYNC instruction in the instruction
  83  *   stream reaches the same stage in the load/store datapath.
  84  *
  85  * - If any memory instruction before the SYNC instruction in program order,
  86  *   generates a memory request to the external memory and any memory
  87  *   instruction after the SYNC instruction in program order also generates a
  88  *   memory request to external memory, the memory request belonging to the
  89  *   older instruction must be globally performed before the time the memory
  90  *   request belonging to the younger instruction is globally performed.
  91  *
  92  * - The barrier does not guarantee the order in which instruction fetches are
  93  *   performed.
  94  */
  95 
  96 /*
  97  * stype 0x10 - An ordering barrier that affects preceding loads and stores and
  98  * subsequent loads and stores.
  99  * Older instructions which must reach the load/store ordering point before the
 100  * SYNC instruction completes: Loads, Stores
 101  * Younger instructions which must reach the load/store ordering point only
 102  * after the SYNC instruction completes: Loads, Stores
 103  * Older instructions which must be globally performed when the SYNC instruction
 104  * completes: N/A
 105  */
 106 #define STYPE_SYNC_MB 0x10
 107 
 108 /*
 109  * stype 0x14 - A completion barrier specific to global invalidations
 110  *
 111  * When a sync instruction of this type completes any preceding GINVI or GINVT
 112  * operation has been globalized & completed on all coherent CPUs. Anything
 113  * that the GINV* instruction should invalidate will have been invalidated on
 114  * all coherent CPUs when this instruction completes. It is implementation
 115  * specific whether the GINV* instructions themselves will ensure completion,
 116  * or this sync type will.
 117  *
 118  * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
 119  * this sync type also requires that previous SYNCI operations have completed.
 120  */
 121 #define STYPE_GINV      0x14
 122 
 123 #ifdef CONFIG_CPU_HAS_SYNC
 124 #define __sync()                                \
 125         __asm__ __volatile__(                   \
 126                 ".set   push\n\t"               \
 127                 ".set   noreorder\n\t"          \
 128                 ".set   mips2\n\t"              \
 129                 "sync\n\t"                      \
 130                 ".set   pop"                    \
 131                 : /* no output */               \
 132                 : /* no input */                \
 133                 : "memory")
 134 #else
 135 #define __sync()        do { } while(0)
 136 #endif
 137 
 138 #define __fast_iob()                            \
 139         __asm__ __volatile__(                   \
 140                 ".set   push\n\t"               \
 141                 ".set   noreorder\n\t"          \
 142                 "lw     $0,%0\n\t"              \
 143                 "nop\n\t"                       \
 144                 ".set   pop"                    \
 145                 : /* no output */               \
 146                 : "m" (*(int *)CKSEG1)          \
 147                 : "memory")
 148 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 149 # define OCTEON_SYNCW_STR       ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
 150 # define __syncw()      __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
 151 
 152 # define fast_wmb()     __syncw()
 153 # define fast_rmb()     barrier()
 154 # define fast_mb()      __sync()
 155 # define fast_iob()     do { } while (0)
 156 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
 157 # define fast_wmb()     __sync()
 158 # define fast_rmb()     __sync()
 159 # define fast_mb()      __sync()
 160 # ifdef CONFIG_SGI_IP28
 161 #  define fast_iob()                            \
 162         __asm__ __volatile__(                   \
 163                 ".set   push\n\t"               \
 164                 ".set   noreorder\n\t"          \
 165                 "lw     $0,%0\n\t"              \
 166                 "sync\n\t"                      \
 167                 "lw     $0,%0\n\t"              \
 168                 ".set   pop"                    \
 169                 : /* no output */               \
 170                 : "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
 171                 : "memory")
 172 # else
 173 #  define fast_iob()                            \
 174         do {                                    \
 175                 __sync();                       \
 176                 __fast_iob();                   \
 177         } while (0)
 178 # endif
 179 #endif /* CONFIG_CPU_CAVIUM_OCTEON */
 180 
 181 #ifdef CONFIG_CPU_HAS_WB
 182 
 183 #include <asm/wbflush.h>
 184 
 185 #define mb()            wbflush()
 186 #define iob()           wbflush()
 187 
 188 #else /* !CONFIG_CPU_HAS_WB */
 189 
 190 #define mb()            fast_mb()
 191 #define iob()           fast_iob()
 192 
 193 #endif /* !CONFIG_CPU_HAS_WB */
 194 
 195 #define wmb()           fast_wmb()
 196 #define rmb()           fast_rmb()
 197 
 198 #if defined(CONFIG_WEAK_ORDERING)
 199 # ifdef CONFIG_CPU_CAVIUM_OCTEON
 200 #  define __smp_mb()    __sync()
 201 #  define __smp_rmb()   barrier()
 202 #  define __smp_wmb()   __syncw()
 203 # else
 204 #  define __smp_mb()    __asm__ __volatile__("sync" : : :"memory")
 205 #  define __smp_rmb()   __asm__ __volatile__("sync" : : :"memory")
 206 #  define __smp_wmb()   __asm__ __volatile__("sync" : : :"memory")
 207 # endif
 208 #else
 209 #define __smp_mb()      barrier()
 210 #define __smp_rmb()     barrier()
 211 #define __smp_wmb()     barrier()
 212 #endif
 213 
 214 /*
 215  * When LL/SC does imply order, it must also be a compiler barrier to avoid the
 216  * compiler from reordering where the CPU will not. When it does not imply
 217  * order, the compiler is also free to reorder across the LL/SC loop and
 218  * ordering will be done by smp_llsc_mb() and friends.
 219  */
 220 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
 221 #define __WEAK_LLSC_MB          "       sync    \n"
 222 #define smp_llsc_mb()           __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
 223 #define __LLSC_CLOBBER
 224 #else
 225 #define __WEAK_LLSC_MB          "               \n"
 226 #define smp_llsc_mb()           do { } while (0)
 227 #define __LLSC_CLOBBER          "memory"
 228 #endif
 229 
 230 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 231 #define smp_mb__before_llsc() smp_wmb()
 232 #define __smp_mb__before_llsc() __smp_wmb()
 233 /* Cause previous writes to become visible on all CPUs as soon as possible */
 234 #define nudge_writes() __asm__ __volatile__(".set push\n\t"             \
 235                                             ".set arch=octeon\n\t"      \
 236                                             "syncw\n\t"                 \
 237                                             ".set pop" : : : "memory")
 238 #else
 239 #define smp_mb__before_llsc() smp_llsc_mb()
 240 #define __smp_mb__before_llsc() smp_llsc_mb()
 241 #define nudge_writes() mb()
 242 #endif
 243 
 244 #define __smp_mb__before_atomic()       __smp_mb__before_llsc()
 245 #define __smp_mb__after_atomic()        smp_llsc_mb()
 246 
 247 /*
 248  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
 249  * store or prefetch) in between an LL & SC can cause the SC instruction to
 250  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
 251  * containing such sequences, this bug bites harder than we might otherwise
 252  * expect due to reordering & speculation:
 253  *
 254  * 1) A memory access appearing prior to the LL in program order may actually
 255  *    be executed after the LL - this is the reordering case.
 256  *
 257  *    In order to avoid this we need to place a memory barrier (ie. a SYNC
 258  *    instruction) prior to every LL instruction, in between it and any earlier
 259  *    memory access instructions.
 260  *
 261  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
 262  *
 263  * 2) If a conditional branch exists between an LL & SC with a target outside
 264  *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
 265  *    or similar, then misprediction of the branch may allow speculative
 266  *    execution of memory accesses from outside of the LL-SC loop.
 267  *
 268  *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
 269  *    at each affected branch target, for which we also use loongson_llsc_mb()
 270  *    defined below.
 271  *
 272  *    This case affects all current Loongson 3 CPUs.
 273  *
 274  * The above described cases cause an error in the cache coherence protocol;
 275  * such that the Invalidate of a competing LL-SC goes 'missing' and SC
 276  * erroneously observes its core still has Exclusive state and lets the SC
 277  * proceed.
 278  *
 279  * Therefore the error only occurs on SMP systems.
 280  */
 281 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
 282 #define loongson_llsc_mb()      __asm__ __volatile__("sync" : : :"memory")
 283 #else
 284 #define loongson_llsc_mb()      do { } while (0)
 285 #endif
 286 
 287 static inline void sync_ginv(void)
 288 {
 289         asm volatile("sync\t%0" :: "i"(STYPE_GINV));
 290 }
 291 
 292 #include <asm-generic/barrier.h>
 293 
 294 #endif /* __ASM_BARRIER_H */
/* [<][>][^][v][top][bottom][index][help] */
root/arch/mips/include/asm/barrier.h

INCLUDED FROM

DEFINITIONS