Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // This file is an internal atomic implementation, use base/atomicops.h instead. | |
| 6 // | |
| 7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. | |
| 8 | |
| 9 #ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
| 10 #define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
| 11 | |
| 12 namespace base { | |
| 13 namespace subtle { | |
| 14 | |
| 15 // Memory barriers on ARM are funky, but the kernel is here to help: | |
|
Mark Mentovai
2013/06/05 13:34:31
Thanks for this comment!
| |
| 16 // | |
| 17 // * ARMv5 didn't support SMP, there is no memory barrier instruction at | |
| 18 // all on this architecture, or when targetting its machine code. | |
|
Mark Mentovai
2013/06/05 13:34:31
targeting
digit1
2013/06/05 16:35:47
Done.
| |
| 19 // | |
| 20 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by | |
| 21 // writing a random value to a very specific coprocessor register. | |
| 22 // | |
| 23 // * On ARMv7, the "dmb" instruction is used to perform a full memory | |
| 24 // barrier (though writing to the co-processor will still work). | |
| 25 // However, on single core devices (e.g. Nexus One, or Nexus S), | |
| 26 // this instruction will take up to 200 ns, which is huge, even though | |
| 27 // it's completely un-needed on these devices. | |
| 28 // | |
| 29 // * There is no easy way to determine at runtime if the device is | |
| 30 // single or multi-core. However, the kernel provide a useful helper | |
|
Mark Mentovai
2013/06/05 13:34:31
provides
digit1
2013/06/05 16:35:47
Done.
| |
| 31 // function at a fixed memory address (0xffff0fa0), which will always | |
| 32 // perform a memory barrier in the most efficient way. I.e. on single | |
| 33 // core devices, this is an empty function that exits immediately. | |
| 34 // On multi-core devices, it implements a full memory barrier. | |
| 35 // | |
| 36 // Note that this helper function doesn't modify any register or memory. | |
| 37 // See the comment in Barrier_AtomicIncrement() to see why it is | |
|
Mark Mentovai
2013/06/05 13:34:31
There are two versions of Barrier_AtomicIncrement(
digit1
2013/06/05 16:35:47
This comment is obsolete (it was only useful for a
| |
| 38 // important. | |
| 39 // | |
| 40 // * This source could be compiled to ARMv5 machine code that runs on a | |
| 41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers | |
| 42 // are needed for correct execution. Always call the kernel helper, even | |
| 43 // when targetting ARMv5TE. | |
| 44 // | |
| 45 | |
| 46 #define LINUX_ARM_KERNEL_MEMORY_BARRIER 0xffff0fa0 | |
|
Mark Mentovai
2013/06/05 13:34:31
You can avoid the macro entirely (and thus the nee
digit1
2013/06/05 16:35:47
Yes, again, this came from the previous patch were
| |
| 47 | |
| 48 inline void MemoryBarrier() { | |
|
Mark Mentovai
2013/06/05 13:34:31
Make this static or put it in an unnamed namespace
digit1
2013/06/05 16:35:47
Unfortunately, the function is declared in base/at
| |
| 49 // Note: This is a function call, which is also an implicit compiler | |
| 50 // barrier. | |
| 51 ((void (*)(void))LINUX_ARM_KERNEL_MEMORY_BARRIER)(); | |
|
Mark Mentovai
2013/06/05 13:34:31
This is C++, the second void is unnecessary, it ca
digit1
2013/06/05 16:35:47
Done.
| |
| 52 } | |
| 53 | |
| 54 #if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_6__) | |
| 55 | |
| 56 // On ARMv6 and higher, it is possible to directly use ldrex/strex | |
| 57 // instructions to implement fast atomic operations directly. | |
| 58 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
| 59 Atomic32 old_value, | |
| 60 Atomic32 new_value) { | |
| 61 Atomic32 prev_value; | |
| 62 int reloop; | |
| 63 do { | |
| 64 // The following is equivalent to: | |
| 65 // | |
| 66 // prev_value = LDREX(ptr) | |
| 67 // reloop = 0 | |
| 68 // if (prev_value != old_value) | |
| 69 // reloop = STREX(ptr, new_value) | |
| 70 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
| 71 " mov %1, #0\n" | |
| 72 " teq %0, %4\n" | |
| 73 #ifdef __thumb2__ | |
| 74 " it eq\n" | |
| 75 #endif | |
| 76 " strexeq %1, %5, [%3]\n" | |
| 77 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) | |
| 78 : "r"(ptr), "r"(old_value), "r"(new_value) | |
| 79 : "cc", "memory"); | |
| 80 } while (reloop != 0); | |
| 81 return prev_value; | |
| 82 } | |
| 83 | |
| 84 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
| 85 Atomic32 old_value, | |
| 86 Atomic32 new_value) { | |
| 87 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
| 88 MemoryBarrier(); | |
| 89 return result; | |
| 90 } | |
| 91 | |
| 92 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
| 93 Atomic32 old_value, | |
| 94 Atomic32 new_value) { | |
| 95 MemoryBarrier(); | |
| 96 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
| 97 } | |
| 98 | |
| 99 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
| 100 Atomic32 increment) { | |
| 101 Atomic32 value; | |
| 102 int reloop; | |
| 103 do { | |
| 104 // Equivalent to: | |
| 105 // | |
| 106 // value = LDREX(ptr) | |
| 107 // value += increment | |
| 108 // reloop = STREX(ptr, value) | |
| 109 // | |
| 110 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
| 111 " add %0, %0, %4\n" | |
| 112 " strex %1, %0, [%3]\n" | |
| 113 : "=&r"(value), "=&r"(reloop), "+m"(*ptr) | |
| 114 : "r"(ptr), "r"(increment) | |
| 115 : "cc", "memory"); | |
| 116 } while (reloop); | |
| 117 return value; | |
| 118 } | |
| 119 | |
| 120 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
| 121 Atomic32 increment) { | |
| 122 // TODO(digit): Investigate if it's possible to implement this with | |
| 123 // a single MemoryBarrier() operation between the LDREX and STREX. | |
| 124 // See http://crbug.com/246514 | |
| 125 MemoryBarrier(); | |
| 126 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); | |
| 127 MemoryBarrier(); | |
| 128 return result; | |
| 129 } | |
| 130 | |
| 131 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
| 132 Atomic32 new_value) { | |
| 133 Atomic32 old_value; | |
| 134 int reloop; | |
| 135 do { | |
| 136 // old_value = LDREX(ptr) | |
| 137 // fail = STREX(ptr, new_value) | |
| 138 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
| 139 " strex %1, %4, [%3]\n" | |
| 140 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) | |
| 141 : "r"(ptr), "r"(new_value) | |
| 142 : "cc", "memory"); | |
| 143 } while (reloop != 0); | |
| 144 return old_value; | |
| 145 } | |
| 146 | |
| 147 #else | |
|
Mark Mentovai
2013/06/05 13:34:31
Is there a C preprocessor macro you can use to ind
digit1
2013/06/05 16:35:47
I've done that. Note that the checks must be more
| |
| 148 | |
| 149 // The kernel also provides a helper function to perform an atomic | |
| 150 // compare-and-swap operation at the hard-wired address 0xffff0fc0. | |
| 151 // On ARMv5, this is implemented by a special code path that the kernel | |
| 152 // detects and treats specially when thread pre-emption happens. | |
| 153 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. | |
| 154 // | |
| 155 // Note that this always perform a full memory barrier, there is no | |
| 156 // need to add calls MemoryBarrier() before or after it. It also | |
| 157 // returns 0 on success, and 1 on exit. | |
| 158 // | |
| 159 // Available and reliable since Linux 2.6.24. Note that the first Android | |
| 160 // releases used 2.6.29, and ChromeOS is currently at 3.4, iirc, so this | |
|
Mark Mentovai
2013/06/05 13:34:31
“iirc” has no place in a comment. You have the lux
| |
| 161 // should only be a concern for people running _really_ old custom | |
| 162 // Linux/ARM distributions). | |
|
Mark Mentovai
2013/06/05 13:34:31
You’re ending a parenthetical that you never start
| |
| 163 typedef int (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, | |
| 164 Atomic32 new_value, | |
| 165 volatile Atomic32* ptr); | |
| 166 LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) = | |
|
Mark Mentovai
2013/06/05 13:34:31
Isn’t this saying that base::subtle::pLinuxKernelC
Mark Mentovai
2013/06/05 13:34:31
The p prefix is uncommon in Chrome code. variables
digit1
2013/06/05 16:35:47
Yes, this is part of the original code that was re
| |
| 167 (LinuxKernelCmpxchgFunc)0xffff0fc0; | |
|
Mark Mentovai
2013/06/05 13:34:31
The typedef and this pointer should both be in an
Mark Mentovai
2013/06/05 13:34:31
Make this const? You don’t want someone accidental
digit1
2013/06/05 16:35:47
Same here.
| |
| 168 | |
| 169 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
| 170 Atomic32 old_value, | |
| 171 Atomic32 new_value) { | |
| 172 Atomic32 prev_value; | |
| 173 for (;;) { | |
| 174 prev_value = *ptr; | |
| 175 if (prev_value != old_value) | |
| 176 return prev_value; | |
| 177 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr)) | |
| 178 return old_value; | |
| 179 } | |
| 180 } | |
| 181 | |
| 182 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
| 183 Atomic32 new_value) { | |
| 184 Atomic32 old_value; | |
| 185 do { | |
| 186 old_value = *ptr; | |
| 187 } while (pLinuxKernelCmpxchg(old_value, new_value, ptr)); | |
| 188 return old_value; | |
| 189 } | |
| 190 | |
| 191 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
| 192 Atomic32 increment) { | |
| 193 return Barrier_AtomicIncrement(ptr, increment); | |
| 194 } | |
| 195 | |
| 196 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
| 197 Atomic32 increment) { | |
| 198 for (;;) { | |
| 199 // Atomic exchange the old value with an incremented one. | |
| 200 Atomic32 old_value = *ptr; | |
| 201 Atomic32 new_value = old_value + increment; | |
| 202 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr)) { | |
| 203 // The exchange took place as expected. | |
| 204 return new_value; | |
| 205 } | |
| 206 // Otherwise, *ptr changed mid-loop and we need to retry. | |
| 207 } | |
| 208 } | |
| 209 | |
| 210 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
| 211 Atomic32 old_value, | |
| 212 Atomic32 new_value) { | |
| 213 Atomic32 prev_value; | |
| 214 for (;;) { | |
| 215 prev_value = *ptr; | |
| 216 if (prev_value != old_value) { | |
| 217 // Always ensure acquire semantics. | |
| 218 MemoryBarrier(); | |
| 219 return prev_value; | |
| 220 } | |
| 221 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr)) | |
| 222 return old_value; | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
| 227 Atomic32 old_value, | |
| 228 Atomic32 new_value) { | |
| 229 // Use NoBarrier_CompareAndSwap(), because its implementation | |
| 230 // ensures that all stores happen through the kernel helper | |
| 231 // which always implement a full barrier. | |
| 232 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
| 233 } | |
| 234 | |
| 235 #endif // __ARM_ARCH_6__ || __ARM_ARCH_7A__ | |
| 236 | |
| 237 // NOTE: Atomicity of the following load and store operations is only | |
| 238 // guaranteed in case of 32-bit alignement of |ptr| values. | |
| 239 | |
| 240 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { | |
| 241 *ptr = value; | |
| 242 } | |
| 243 | |
| 244 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | |
| 245 *ptr = value; | |
| 246 MemoryBarrier(); | |
| 247 } | |
| 248 | |
| 249 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { | |
| 250 MemoryBarrier(); | |
| 251 *ptr = value; | |
| 252 } | |
| 253 | |
| 254 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } | |
| 255 | |
| 256 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { | |
| 257 Atomic32 value = *ptr; | |
| 258 MemoryBarrier(); | |
| 259 return value; | |
| 260 } | |
| 261 | |
| 262 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { | |
| 263 MemoryBarrier(); | |
| 264 return *ptr; | |
| 265 } | |
| 266 | |
| 267 #undef LINUX_ARM_KERNEL_MEMORY_BARRIER | |
| 268 | |
| 269 } // namespace base::subtle | |
| 270 } // namespace base | |
| 271 | |
| 272 #endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
| OLD | NEW |