OLD | NEW |
| (Empty) |
1 // Copyright 2010 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This file is an internal atomic implementation, use atomicops.h instead. | |
6 // | |
7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. | |
8 | |
9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
11 | |
12 #if defined(__QNXNTO__) | |
13 #include <sys/cpuinline.h> | |
14 #endif | |
15 | |
16 namespace v8 { | |
17 namespace base { | |
18 | |
19 // Memory barriers on ARM are funky, but the kernel is here to help: | |
20 // | |
21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at | |
22 // all on this architecture, or when targeting its machine code. | |
23 // | |
24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by | |
25 // writing a random value to a very specific coprocessor register. | |
26 // | |
27 // * On ARMv7, the "dmb" instruction is used to perform a full memory | |
28 // barrier (though writing to the co-processor will still work). | |
29 // However, on single core devices (e.g. Nexus One, or Nexus S), | |
30 // this instruction will take up to 200 ns, which is huge, even though | |
31 // it's completely un-needed on these devices. | |
32 // | |
33 // * There is no easy way to determine at runtime if the device is | |
34 // single or multi-core. However, the kernel provides a useful helper | |
35 // function at a fixed memory address (0xffff0fa0), which will always | |
36 // perform a memory barrier in the most efficient way. I.e. on single | |
37 // core devices, this is an empty function that exits immediately. | |
38 // On multi-core devices, it implements a full memory barrier. | |
39 // | |
40 // * This source could be compiled to ARMv5 machine code that runs on a | |
41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers | |
42 // are needed for correct execution. Always call the kernel helper, even | |
43 // when targeting ARMv5TE. | |
44 // | |
45 | |
46 inline void MemoryBarrier() { | |
47 #if defined(__ANDROID__) | |
48 // Note: This is a function call, which is also an implicit compiler barrier. | |
49 typedef void (*KernelMemoryBarrierFunc)(); | |
50 ((KernelMemoryBarrierFunc)0xffff0fa0)(); | |
51 #elif defined(__QNXNTO__) | |
52 __cpu_membarrier(); | |
53 #else | |
54 // Fallback to GCC built-in function | |
55 __sync_synchronize(); | |
56 #endif | |
57 } | |
58 | |
59 // An ARM toolchain would only define one of these depending on which | |
60 // variant of the target architecture is being used. This tests against | |
61 // any known ARMv6 or ARMv7 variant, where it is possible to directly | |
62 // use ldrex/strex instructions to implement fast atomic operations. | |
63 #if defined(__ARM_ARCH_8A__) || \ | |
64 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ | |
65 defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ | |
66 defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ | |
67 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ | |
68 defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) | |
69 | |
70 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
71 Atomic32 old_value, | |
72 Atomic32 new_value) { | |
73 Atomic32 prev_value; | |
74 int reloop; | |
75 do { | |
76 // The following is equivalent to: | |
77 // | |
78 // prev_value = LDREX(ptr) | |
79 // reloop = 0 | |
80 // if (prev_value != old_value) | |
81 // reloop = STREX(ptr, new_value) | |
82 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
83 " mov %1, #0\n" | |
84 " cmp %0, %4\n" | |
85 #ifdef __thumb2__ | |
86 " it eq\n" | |
87 #endif | |
88 " strexeq %1, %5, [%3]\n" | |
89 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) | |
90 : "r"(ptr), "r"(old_value), "r"(new_value) | |
91 : "cc", "memory"); | |
92 } while (reloop != 0); | |
93 return prev_value; | |
94 } | |
95 | |
96 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
97 Atomic32 old_value, | |
98 Atomic32 new_value) { | |
99 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
100 MemoryBarrier(); | |
101 return result; | |
102 } | |
103 | |
104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
105 Atomic32 old_value, | |
106 Atomic32 new_value) { | |
107 MemoryBarrier(); | |
108 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
109 } | |
110 | |
111 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
112 Atomic32 increment) { | |
113 Atomic32 value; | |
114 int reloop; | |
115 do { | |
116 // Equivalent to: | |
117 // | |
118 // value = LDREX(ptr) | |
119 // value += increment | |
120 // reloop = STREX(ptr, value) | |
121 // | |
122 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
123 " add %0, %0, %4\n" | |
124 " strex %1, %0, [%3]\n" | |
125 : "=&r"(value), "=&r"(reloop), "+m"(*ptr) | |
126 : "r"(ptr), "r"(increment) | |
127 : "cc", "memory"); | |
128 } while (reloop); | |
129 return value; | |
130 } | |
131 | |
132 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
133 Atomic32 increment) { | |
134 // TODO(digit): Investigate if it's possible to implement this with | |
135 // a single MemoryBarrier() operation between the LDREX and STREX. | |
136 // See http://crbug.com/246514 | |
137 MemoryBarrier(); | |
138 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); | |
139 MemoryBarrier(); | |
140 return result; | |
141 } | |
142 | |
143 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
144 Atomic32 new_value) { | |
145 Atomic32 old_value; | |
146 int reloop; | |
147 do { | |
148 // old_value = LDREX(ptr) | |
149 // reloop = STREX(ptr, new_value) | |
150 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
151 " strex %1, %4, [%3]\n" | |
152 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) | |
153 : "r"(ptr), "r"(new_value) | |
154 : "cc", "memory"); | |
155 } while (reloop != 0); | |
156 return old_value; | |
157 } | |
158 | |
159 // This tests against any known ARMv5 variant. | |
160 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ | |
161 defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) | |
162 | |
163 // The kernel also provides a helper function to perform an atomic | |
164 // compare-and-swap operation at the hard-wired address 0xffff0fc0. | |
165 // On ARMv5, this is implemented by a special code path that the kernel | |
166 // detects and treats specially when thread pre-emption happens. | |
167 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. | |
168 // | |
169 // Note that this always perform a full memory barrier, there is no | |
170 // need to add calls MemoryBarrier() before or after it. It also | |
171 // returns 0 on success, and 1 on exit. | |
172 // | |
173 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS | |
174 // use newer kernel revisions, so this should not be a concern. | |
175 namespace { | |
176 | |
177 inline int LinuxKernelCmpxchg(Atomic32 old_value, | |
178 Atomic32 new_value, | |
179 volatile Atomic32* ptr) { | |
180 typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); | |
181 return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); | |
182 } | |
183 | |
184 } // namespace | |
185 | |
186 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
187 Atomic32 old_value, | |
188 Atomic32 new_value) { | |
189 Atomic32 prev_value; | |
190 for (;;) { | |
191 prev_value = *ptr; | |
192 if (prev_value != old_value) | |
193 return prev_value; | |
194 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) | |
195 return old_value; | |
196 } | |
197 } | |
198 | |
199 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
200 Atomic32 new_value) { | |
201 Atomic32 old_value; | |
202 do { | |
203 old_value = *ptr; | |
204 } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); | |
205 return old_value; | |
206 } | |
207 | |
208 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
209 Atomic32 increment) { | |
210 return Barrier_AtomicIncrement(ptr, increment); | |
211 } | |
212 | |
213 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
214 Atomic32 increment) { | |
215 for (;;) { | |
216 // Atomic exchange the old value with an incremented one. | |
217 Atomic32 old_value = *ptr; | |
218 Atomic32 new_value = old_value + increment; | |
219 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { | |
220 // The exchange took place as expected. | |
221 return new_value; | |
222 } | |
223 // Otherwise, *ptr changed mid-loop and we need to retry. | |
224 } | |
225 } | |
226 | |
227 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
228 Atomic32 old_value, | |
229 Atomic32 new_value) { | |
230 Atomic32 prev_value; | |
231 for (;;) { | |
232 prev_value = *ptr; | |
233 if (prev_value != old_value) { | |
234 // Always ensure acquire semantics. | |
235 MemoryBarrier(); | |
236 return prev_value; | |
237 } | |
238 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) | |
239 return old_value; | |
240 } | |
241 } | |
242 | |
243 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
244 Atomic32 old_value, | |
245 Atomic32 new_value) { | |
246 // This could be implemented as: | |
247 // MemoryBarrier(); | |
248 // return NoBarrier_CompareAndSwap(); | |
249 // | |
250 // But would use 3 barriers per succesful CAS. To save performance, | |
251 // use Acquire_CompareAndSwap(). Its implementation guarantees that: | |
252 // - A succesful swap uses only 2 barriers (in the kernel helper). | |
253 // - An early return due to (prev_value != old_value) performs | |
254 // a memory barrier with no store, which is equivalent to the | |
255 // generic implementation above. | |
256 return Acquire_CompareAndSwap(ptr, old_value, new_value); | |
257 } | |
258 | |
259 #else | |
260 # error "Your CPU's ARM architecture is not supported yet" | |
261 #endif | |
262 | |
263 // NOTE: Atomicity of the following load and store operations is only | |
264 // guaranteed in case of 32-bit alignement of |ptr| values. | |
265 | |
266 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { | |
267 *ptr = value; | |
268 } | |
269 | |
270 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | |
271 *ptr = value; | |
272 MemoryBarrier(); | |
273 } | |
274 | |
275 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { | |
276 MemoryBarrier(); | |
277 *ptr = value; | |
278 } | |
279 | |
280 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } | |
281 | |
282 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { | |
283 Atomic32 value = *ptr; | |
284 MemoryBarrier(); | |
285 return value; | |
286 } | |
287 | |
288 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { | |
289 MemoryBarrier(); | |
290 return *ptr; | |
291 } | |
292 | |
293 // Byte accessors. | |
294 | |
295 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) { | |
296 *ptr = value; | |
297 } | |
298 | |
299 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; } | |
300 | |
301 } // namespace base | |
302 } // namespace v8 | |
303 | |
304 #endif // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
OLD | NEW |