| OLD | NEW |
| (Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // This file is an internal atomic implementation, use base/atomicops.h instead. |
| 6 // |
| 7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. |
| 8 |
| 9 #ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
| 10 #define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
| 11 |
| 12 namespace base { |
| 13 namespace subtle { |
| 14 |
| 15 // Memory barriers on ARM are funky, but the kernel is here to help: |
| 16 // |
| 17 // * ARMv5 didn't support SMP, there is no memory barrier instruction at |
| 18 // all on this architecture, or when targeting its machine code. |
| 19 // |
| 20 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by |
| 21 // writing a random value to a very specific coprocessor register. |
| 22 // |
| 23 // * On ARMv7, the "dmb" instruction is used to perform a full memory |
| 24 // barrier (though writing to the co-processor will still work). |
| 25 // However, on single core devices (e.g. Nexus One, or Nexus S), |
| 26 // this instruction will take up to 200 ns, which is huge, even though |
| 27 // it's completely un-needed on these devices. |
| 28 // |
| 29 // * There is no easy way to determine at runtime if the device is |
| 30 // single or multi-core. However, the kernel provides a useful helper |
| 31 // function at a fixed memory address (0xffff0fa0), which will always |
| 32 // perform a memory barrier in the most efficient way. I.e. on single |
| 33 // core devices, this is an empty function that exits immediately. |
| 34 // On multi-core devices, it implements a full memory barrier. |
| 35 // |
| 36 // * This source could be compiled to ARMv5 machine code that runs on a |
| 37 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers |
| 38 // are needed for correct execution. Always call the kernel helper, even |
| 39 // when targeting ARMv5TE. |
| 40 // |
| 41 |
| 42 inline void MemoryBarrier() { |
| 43 // Note: This is a function call, which is also an implicit compiler |
| 44 // barrier. |
| 45 typedef void (*KernelMemoryBarrierFunc)(); |
| 46 ((KernelMemoryBarrierFunc)0xffff0fa0)(); |
| 47 } |
| 48 |
| 49 // An ARM toolchain would only define one of these depending on which |
| 50 // variant of the target architecture is being used. This tests against |
| 51 // any known ARMv6 or ARMv7 variant, where it is possible to directly |
| 52 // use ldrex/strex instructions to implement fast atomic operations. |
| 53 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ |
| 54 defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ |
| 55 defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ |
| 56 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ |
| 57 defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__) |
| 58 |
| 59 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 60 Atomic32 old_value, |
| 61 Atomic32 new_value) { |
| 62 Atomic32 prev_value; |
| 63 int reloop; |
| 64 do { |
| 65 // The following is equivalent to: |
| 66 // |
| 67 // prev_value = LDREX(ptr) |
| 68 // reloop = 0 |
| 69 // if (prev_value != old_value) |
| 70 // reloop = STREX(ptr, new_value) |
| 71 __asm__ __volatile__(" ldrex %0, [%3]\n" |
| 72 " mov %1, #0\n" |
| 73 " teq %0, %4\n" |
| 74 #ifdef __thumb2__ |
| 75 " it eq\n" |
| 76 #endif |
| 77 " strexeq %1, %5, [%3]\n" |
| 78 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) |
| 79 : "r"(ptr), "r"(old_value), "r"(new_value) |
| 80 : "cc", "memory"); |
| 81 } while (reloop != 0); |
| 82 return prev_value; |
| 83 } |
| 84 |
| 85 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 86 Atomic32 old_value, |
| 87 Atomic32 new_value) { |
| 88 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 89 MemoryBarrier(); |
| 90 return result; |
| 91 } |
| 92 |
| 93 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 94 Atomic32 old_value, |
| 95 Atomic32 new_value) { |
| 96 MemoryBarrier(); |
| 97 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 98 } |
| 99 |
| 100 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
| 101 Atomic32 increment) { |
| 102 Atomic32 value; |
| 103 int reloop; |
| 104 do { |
| 105 // Equivalent to: |
| 106 // |
| 107 // value = LDREX(ptr) |
| 108 // value += increment |
| 109 // reloop = STREX(ptr, value) |
| 110 // |
| 111 __asm__ __volatile__(" ldrex %0, [%3]\n" |
| 112 " add %0, %0, %4\n" |
| 113 " strex %1, %0, [%3]\n" |
| 114 : "=&r"(value), "=&r"(reloop), "+m"(*ptr) |
| 115 : "r"(ptr), "r"(increment) |
| 116 : "cc", "memory"); |
| 117 } while (reloop); |
| 118 return value; |
| 119 } |
| 120 |
| 121 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
| 122 Atomic32 increment) { |
| 123 // TODO(digit): Investigate if it's possible to implement this with |
| 124 // a single MemoryBarrier() operation between the LDREX and STREX. |
| 125 // See http://crbug.com/246514 |
| 126 MemoryBarrier(); |
| 127 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); |
| 128 MemoryBarrier(); |
| 129 return result; |
| 130 } |
| 131 |
| 132 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 133 Atomic32 new_value) { |
| 134 Atomic32 old_value; |
| 135 int reloop; |
| 136 do { |
| 137 // old_value = LDREX(ptr) |
| 138 // fail = STREX(ptr, new_value) |
| 139 __asm__ __volatile__(" ldrex %0, [%3]\n" |
| 140 " strex %1, %4, [%3]\n" |
| 141 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) |
| 142 : "r"(ptr), "r"(new_value) |
| 143 : "cc", "memory"); |
| 144 } while (reloop != 0); |
| 145 return old_value; |
| 146 } |
| 147 |
| 148 // This tests against any known ARMv5 variant. |
| 149 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ |
| 150 defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) |
| 151 |
| 152 // The kernel also provides a helper function to perform an atomic |
| 153 // compare-and-swap operation at the hard-wired address 0xffff0fc0. |
| 154 // On ARMv5, this is implemented by a special code path that the kernel |
| 155 // detects and treats specially when thread pre-emption happens. |
| 156 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. |
| 157 // |
| 158 // Note that this always perform a full memory barrier, there is no |
| 159 // need to add calls MemoryBarrier() before or after it. It also |
| 160 // returns 0 on success, and 1 on exit. |
| 161 // |
| 162 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS |
| 163 // use newer kernel revisions, so this should not be a concern. |
| 164 namespace { |
| 165 |
| 166 inline int LinuxKernelCmpxchg(Atomic32 old_value, |
| 167 Atomic32 new_value, |
| 168 volatile Atomic32* ptr) { |
| 169 typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); |
| 170 return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); |
| 171 } |
| 172 |
| 173 } // namespace |
| 174 |
| 175 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 176 Atomic32 old_value, |
| 177 Atomic32 new_value) { |
| 178 Atomic32 prev_value; |
| 179 for (;;) { |
| 180 prev_value = *ptr; |
| 181 if (prev_value != old_value) |
| 182 return prev_value; |
| 183 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
| 184 return old_value; |
| 185 } |
| 186 } |
| 187 |
| 188 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 189 Atomic32 new_value) { |
| 190 Atomic32 old_value; |
| 191 do { |
| 192 old_value = *ptr; |
| 193 } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); |
| 194 return old_value; |
| 195 } |
| 196 |
| 197 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
| 198 Atomic32 increment) { |
| 199 return Barrier_AtomicIncrement(ptr, increment); |
| 200 } |
| 201 |
| 202 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
| 203 Atomic32 increment) { |
| 204 for (;;) { |
| 205 // Atomic exchange the old value with an incremented one. |
| 206 Atomic32 old_value = *ptr; |
| 207 Atomic32 new_value = old_value + increment; |
| 208 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { |
| 209 // The exchange took place as expected. |
| 210 return new_value; |
| 211 } |
| 212 // Otherwise, *ptr changed mid-loop and we need to retry. |
| 213 } |
| 214 } |
| 215 |
| 216 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 217 Atomic32 old_value, |
| 218 Atomic32 new_value) { |
| 219 Atomic32 prev_value; |
| 220 for (;;) { |
| 221 prev_value = *ptr; |
| 222 if (prev_value != old_value) { |
| 223 // Always ensure acquire semantics. |
| 224 MemoryBarrier(); |
| 225 return prev_value; |
| 226 } |
| 227 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
| 228 return old_value; |
| 229 } |
| 230 } |
| 231 |
| 232 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 233 Atomic32 old_value, |
| 234 Atomic32 new_value) { |
| 235 // Use NoBarrier_CompareAndSwap(), because its implementation |
| 236 // ensures that all stores happen through the kernel helper |
| 237 // which always implement a full barrier. |
| 238 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 239 } |
| 240 |
| 241 #else |
| 242 # error "Your CPU's ARM architecture is not supported yet" |
| 243 #endif |
| 244 |
| 245 // NOTE: Atomicity of the following load and store operations is only |
| 246 // guaranteed in case of 32-bit alignement of |ptr| values. |
| 247 |
| 248 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 249 *ptr = value; |
| 250 } |
| 251 |
| 252 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 253 *ptr = value; |
| 254 MemoryBarrier(); |
| 255 } |
| 256 |
| 257 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 258 MemoryBarrier(); |
| 259 *ptr = value; |
| 260 } |
| 261 |
| 262 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } |
| 263 |
| 264 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
| 265 Atomic32 value = *ptr; |
| 266 MemoryBarrier(); |
| 267 return value; |
| 268 } |
| 269 |
| 270 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { |
| 271 MemoryBarrier(); |
| 272 return *ptr; |
| 273 } |
| 274 |
| 275 } // namespace base::subtle |
| 276 } // namespace base |
| 277 |
| 278 #endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
| OLD | NEW |