| OLD | NEW | 
| (Empty) |  | 
 |    1 // Copyright 2010 the V8 project authors. All rights reserved. | 
 |    2 // Use of this source code is governed by a BSD-style license that can be | 
 |    3 // found in the LICENSE file. | 
 |    4  | 
 |    5 // This file is an internal atomic implementation, use atomicops.h instead. | 
 |    6 // | 
 |    7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. | 
 |    8  | 
 |    9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | 
 |   10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | 
 |   11  | 
 |   12 #if defined(__QNXNTO__) | 
 |   13 #include <sys/cpuinline.h> | 
 |   14 #endif | 
 |   15  | 
 |   16 namespace v8 { | 
 |   17 namespace base { | 
 |   18  | 
 |   19 // Memory barriers on ARM are funky, but the kernel is here to help: | 
 |   20 // | 
 |   21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at | 
 |   22 //   all on this architecture, or when targeting its machine code. | 
 |   23 // | 
 |   24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by | 
 |   25 //   writing a random value to a very specific coprocessor register. | 
 |   26 // | 
 |   27 // * On ARMv7, the "dmb" instruction is used to perform a full memory | 
 |   28 //   barrier (though writing to the co-processor will still work). | 
 |   29 //   However, on single core devices (e.g. Nexus One, or Nexus S), | 
 |   30 //   this instruction will take up to 200 ns, which is huge, even though | 
 |   31 //   it's completely un-needed on these devices. | 
 |   32 // | 
 |   33 // * There is no easy way to determine at runtime if the device is | 
 |   34 //   single or multi-core. However, the kernel provides a useful helper | 
 |   35 //   function at a fixed memory address (0xffff0fa0), which will always | 
 |   36 //   perform a memory barrier in the most efficient way. I.e. on single | 
 |   37 //   core devices, this is an empty function that exits immediately. | 
 |   38 //   On multi-core devices, it implements a full memory barrier. | 
 |   39 // | 
 |   40 // * This source could be compiled to ARMv5 machine code that runs on a | 
 |   41 //   multi-core ARMv6 or ARMv7 device. In this case, memory barriers | 
 |   42 //   are needed for correct execution. Always call the kernel helper, even | 
 |   43 //   when targeting ARMv5TE. | 
 |   44 // | 
 |   45  | 
 |   46 inline void MemoryBarrier() { | 
 |   47 #if defined(__ANDROID__) | 
 |   48   // Note: This is a function call, which is also an implicit compiler barrier. | 
 |   49   typedef void (*KernelMemoryBarrierFunc)(); | 
 |   50   ((KernelMemoryBarrierFunc)0xffff0fa0)(); | 
 |   51 #elif defined(__QNXNTO__) | 
 |   52   __cpu_membarrier(); | 
 |   53 #else | 
 |   54   // Fallback to GCC built-in function | 
 |   55   __sync_synchronize(); | 
 |   56 #endif | 
 |   57 } | 
 |   58  | 
 |   59 // An ARM toolchain would only define one of these depending on which | 
 |   60 // variant of the target architecture is being used. This tests against | 
 |   61 // any known ARMv6 or ARMv7 variant, where it is possible to directly | 
 |   62 // use ldrex/strex instructions to implement fast atomic operations. | 
 |   63 #if defined(__ARM_ARCH_8A__) || \ | 
 |   64     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) ||  \ | 
 |   65     defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ | 
 |   66     defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) ||  \ | 
 |   67     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ | 
 |   68     defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) | 
 |   69  | 
 |   70 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | 
 |   71                                          Atomic32 old_value, | 
 |   72                                          Atomic32 new_value) { | 
 |   73   Atomic32 prev_value; | 
 |   74   int reloop; | 
 |   75   do { | 
 |   76     // The following is equivalent to: | 
 |   77     // | 
 |   78     //   prev_value = LDREX(ptr) | 
 |   79     //   reloop = 0 | 
 |   80     //   if (prev_value != old_value) | 
 |   81     //      reloop = STREX(ptr, new_value) | 
 |   82     __asm__ __volatile__("    ldrex %0, [%3]\n" | 
 |   83                          "    mov %1, #0\n" | 
 |   84                          "    cmp %0, %4\n" | 
 |   85 #ifdef __thumb2__ | 
 |   86                          "    it eq\n" | 
 |   87 #endif | 
 |   88                          "    strexeq %1, %5, [%3]\n" | 
 |   89                          : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) | 
 |   90                          : "r"(ptr), "r"(old_value), "r"(new_value) | 
 |   91                          : "cc", "memory"); | 
 |   92   } while (reloop != 0); | 
 |   93   return prev_value; | 
 |   94 } | 
 |   95  | 
 |   96 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | 
 |   97                                        Atomic32 old_value, | 
 |   98                                        Atomic32 new_value) { | 
 |   99   Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); | 
 |  100   MemoryBarrier(); | 
 |  101   return result; | 
 |  102 } | 
 |  103  | 
 |  104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | 
 |  105                                        Atomic32 old_value, | 
 |  106                                        Atomic32 new_value) { | 
 |  107   MemoryBarrier(); | 
 |  108   return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | 
 |  109 } | 
 |  110  | 
 |  111 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | 
 |  112                                           Atomic32 increment) { | 
 |  113   Atomic32 value; | 
 |  114   int reloop; | 
 |  115   do { | 
 |  116     // Equivalent to: | 
 |  117     // | 
 |  118     //  value = LDREX(ptr) | 
 |  119     //  value += increment | 
 |  120     //  reloop = STREX(ptr, value) | 
 |  121     // | 
 |  122     __asm__ __volatile__("    ldrex %0, [%3]\n" | 
 |  123                          "    add %0, %0, %4\n" | 
 |  124                          "    strex %1, %0, [%3]\n" | 
 |  125                          : "=&r"(value), "=&r"(reloop), "+m"(*ptr) | 
 |  126                          : "r"(ptr), "r"(increment) | 
 |  127                          : "cc", "memory"); | 
 |  128   } while (reloop); | 
 |  129   return value; | 
 |  130 } | 
 |  131  | 
 |  132 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | 
 |  133                                         Atomic32 increment) { | 
 |  134   // TODO(digit): Investigate if it's possible to implement this with | 
 |  135   // a single MemoryBarrier() operation between the LDREX and STREX. | 
 |  136   // See http://crbug.com/246514 | 
 |  137   MemoryBarrier(); | 
 |  138   Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); | 
 |  139   MemoryBarrier(); | 
 |  140   return result; | 
 |  141 } | 
 |  142  | 
 |  143 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | 
 |  144                                          Atomic32 new_value) { | 
 |  145   Atomic32 old_value; | 
 |  146   int reloop; | 
 |  147   do { | 
 |  148     // old_value = LDREX(ptr) | 
 |  149     // reloop = STREX(ptr, new_value) | 
 |  150     __asm__ __volatile__("   ldrex %0, [%3]\n" | 
 |  151                          "   strex %1, %4, [%3]\n" | 
 |  152                          : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) | 
 |  153                          : "r"(ptr), "r"(new_value) | 
 |  154                          : "cc", "memory"); | 
 |  155   } while (reloop != 0); | 
 |  156   return old_value; | 
 |  157 } | 
 |  158  | 
 |  159 // This tests against any known ARMv5 variant. | 
 |  160 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ | 
 |  161       defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) | 
 |  162  | 
 |  163 // The kernel also provides a helper function to perform an atomic | 
 |  164 // compare-and-swap operation at the hard-wired address 0xffff0fc0. | 
 |  165 // On ARMv5, this is implemented by a special code path that the kernel | 
 |  166 // detects and treats specially when thread pre-emption happens. | 
 |  167 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. | 
 |  168 // | 
 |  169 // Note that this always perform a full memory barrier, there is no | 
 |  170 // need to add calls MemoryBarrier() before or after it. It also | 
 |  171 // returns 0 on success, and 1 on exit. | 
 |  172 // | 
 |  173 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS | 
 |  174 // use newer kernel revisions, so this should not be a concern. | 
 |  175 namespace { | 
 |  176  | 
 |  177 inline int LinuxKernelCmpxchg(Atomic32 old_value, | 
 |  178                               Atomic32 new_value, | 
 |  179                               volatile Atomic32* ptr) { | 
 |  180   typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); | 
 |  181   return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); | 
 |  182 } | 
 |  183  | 
 |  184 }  // namespace | 
 |  185  | 
 |  186 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | 
 |  187                                          Atomic32 old_value, | 
 |  188                                          Atomic32 new_value) { | 
 |  189   Atomic32 prev_value; | 
 |  190   for (;;) { | 
 |  191     prev_value = *ptr; | 
 |  192     if (prev_value != old_value) | 
 |  193       return prev_value; | 
 |  194     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) | 
 |  195       return old_value; | 
 |  196   } | 
 |  197 } | 
 |  198  | 
 |  199 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | 
 |  200                                          Atomic32 new_value) { | 
 |  201   Atomic32 old_value; | 
 |  202   do { | 
 |  203     old_value = *ptr; | 
 |  204   } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); | 
 |  205   return old_value; | 
 |  206 } | 
 |  207  | 
 |  208 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | 
 |  209                                           Atomic32 increment) { | 
 |  210   return Barrier_AtomicIncrement(ptr, increment); | 
 |  211 } | 
 |  212  | 
 |  213 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | 
 |  214                                         Atomic32 increment) { | 
 |  215   for (;;) { | 
 |  216     // Atomic exchange the old value with an incremented one. | 
 |  217     Atomic32 old_value = *ptr; | 
 |  218     Atomic32 new_value = old_value + increment; | 
 |  219     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { | 
 |  220       // The exchange took place as expected. | 
 |  221       return new_value; | 
 |  222     } | 
 |  223     // Otherwise, *ptr changed mid-loop and we need to retry. | 
 |  224   } | 
 |  225 } | 
 |  226  | 
 |  227 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | 
 |  228                                        Atomic32 old_value, | 
 |  229                                        Atomic32 new_value) { | 
 |  230   Atomic32 prev_value; | 
 |  231   for (;;) { | 
 |  232     prev_value = *ptr; | 
 |  233     if (prev_value != old_value) { | 
 |  234       // Always ensure acquire semantics. | 
 |  235       MemoryBarrier(); | 
 |  236       return prev_value; | 
 |  237     } | 
 |  238     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) | 
 |  239       return old_value; | 
 |  240   } | 
 |  241 } | 
 |  242  | 
 |  243 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | 
 |  244                                        Atomic32 old_value, | 
 |  245                                        Atomic32 new_value) { | 
 |  246   // This could be implemented as: | 
 |  247   //    MemoryBarrier(); | 
 |  248   //    return NoBarrier_CompareAndSwap(); | 
 |  249   // | 
 |  250   // But would use 3 barriers per succesful CAS. To save performance, | 
 |  251   // use Acquire_CompareAndSwap(). Its implementation guarantees that: | 
 |  252   // - A succesful swap uses only 2 barriers (in the kernel helper). | 
 |  253   // - An early return due to (prev_value != old_value) performs | 
 |  254   //   a memory barrier with no store, which is equivalent to the | 
 |  255   //   generic implementation above. | 
 |  256   return Acquire_CompareAndSwap(ptr, old_value, new_value); | 
 |  257 } | 
 |  258  | 
 |  259 #else | 
 |  260 #  error "Your CPU's ARM architecture is not supported yet" | 
 |  261 #endif | 
 |  262  | 
 |  263 // NOTE: Atomicity of the following load and store operations is only | 
 |  264 // guaranteed in case of 32-bit alignement of |ptr| values. | 
 |  265  | 
 |  266 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { | 
 |  267   *ptr = value; | 
 |  268 } | 
 |  269  | 
 |  270 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | 
 |  271   *ptr = value; | 
 |  272   MemoryBarrier(); | 
 |  273 } | 
 |  274  | 
 |  275 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { | 
 |  276   MemoryBarrier(); | 
 |  277   *ptr = value; | 
 |  278 } | 
 |  279  | 
 |  280 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } | 
 |  281  | 
 |  282 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { | 
 |  283   Atomic32 value = *ptr; | 
 |  284   MemoryBarrier(); | 
 |  285   return value; | 
 |  286 } | 
 |  287  | 
 |  288 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { | 
 |  289   MemoryBarrier(); | 
 |  290   return *ptr; | 
 |  291 } | 
 |  292  | 
 |  293 // Byte accessors. | 
 |  294  | 
 |  295 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) { | 
 |  296   *ptr = value; | 
 |  297 } | 
 |  298  | 
 |  299 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; } | 
 |  300  | 
 |  301 }  // namespace base | 
 |  302 }  // namespace v8 | 
 |  303  | 
 |  304 #endif  // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | 
| OLD | NEW |