Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(335)

Side by Side Diff: base/atomicops_internals_arm_gcc.h

Issue 16335007: Improve the implementation of atomic operations on Linux/ARM (including Android/ARM). (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix Acquire_CompareAndSwap() for ARMv5 Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/atomicops.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This file is an internal atomic implementation, use base/atomicops.h instead.
6 //
7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
8
9 #ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
10 #define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
11
12 namespace base {
13 namespace subtle {
14
15 // Memory barriers on ARM are funky, but the kernel is here to help:
Mark Mentovai 2013/06/05 13:34:31 Thanks for this comment!
16 //
17 // * ARMv5 didn't support SMP, there is no memory barrier instruction at
18 // all on this architecture, or when targetting its machine code.
Mark Mentovai 2013/06/05 13:34:31 targeting
digit1 2013/06/05 16:35:47 Done.
19 //
20 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
21 // writing a random value to a very specific coprocessor register.
22 //
23 // * On ARMv7, the "dmb" instruction is used to perform a full memory
24 // barrier (though writing to the co-processor will still work).
25 // However, on single core devices (e.g. Nexus One, or Nexus S),
26 // this instruction will take up to 200 ns, which is huge, even though
27 // it's completely un-needed on these devices.
28 //
29 // * There is no easy way to determine at runtime if the device is
30 // single or multi-core. However, the kernel provide a useful helper
Mark Mentovai 2013/06/05 13:34:31 provides
digit1 2013/06/05 16:35:47 Done.
31 // function at a fixed memory address (0xffff0fa0), which will always
32 // perform a memory barrier in the most efficient way. I.e. on single
33 // core devices, this is an empty function that exits immediately.
34 // On multi-core devices, it implements a full memory barrier.
35 //
36 // Note that this helper function doesn't modify any register or memory.
37 // See the comment in Barrier_AtomicIncrement() to see why it is
Mark Mentovai 2013/06/05 13:34:31 There are two versions of Barrier_AtomicIncrement(
digit1 2013/06/05 16:35:47 This comment is obsolete (it was only useful for a
38 // important.
39 //
40 // * This source could be compiled to ARMv5 machine code that runs on a
41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers
42 // are needed for correct execution. Always call the kernel helper, even
43 // when targetting ARMv5TE.
44 //
45
46 #define LINUX_ARM_KERNEL_MEMORY_BARRIER 0xffff0fa0
Mark Mentovai 2013/06/05 13:34:31 You can avoid the macro entirely (and thus the nee
digit1 2013/06/05 16:35:47 Yes, again, this came from the previous patch were
47
48 inline void MemoryBarrier() {
Mark Mentovai 2013/06/05 13:34:31 Make this static or put it in an unnamed namespace
digit1 2013/06/05 16:35:47 Unfortunately, the function is declared in base/at
49 // Note: This is a function call, which is also an implicit compiler
50 // barrier.
51 ((void (*)(void))LINUX_ARM_KERNEL_MEMORY_BARRIER)();
Mark Mentovai 2013/06/05 13:34:31 This is C++, the second void is unnecessary, it ca
digit1 2013/06/05 16:35:47 Done.
52 }
53
54 #if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_6__)
55
56 // On ARMv6 and higher, it is possible to directly use ldrex/strex
57 // instructions to implement fast atomic operations directly.
58 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
59 Atomic32 old_value,
60 Atomic32 new_value) {
61 Atomic32 prev_value;
62 int reloop;
63 do {
64 // The following is equivalent to:
65 //
66 // prev_value = LDREX(ptr)
67 // reloop = 0
68 // if (prev_value != old_value)
69 // reloop = STREX(ptr, new_value)
70 __asm__ __volatile__(" ldrex %0, [%3]\n"
71 " mov %1, #0\n"
72 " teq %0, %4\n"
73 #ifdef __thumb2__
74 " it eq\n"
75 #endif
76 " strexeq %1, %5, [%3]\n"
77 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
78 : "r"(ptr), "r"(old_value), "r"(new_value)
79 : "cc", "memory");
80 } while (reloop != 0);
81 return prev_value;
82 }
83
84 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
85 Atomic32 old_value,
86 Atomic32 new_value) {
87 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
88 MemoryBarrier();
89 return result;
90 }
91
92 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
93 Atomic32 old_value,
94 Atomic32 new_value) {
95 MemoryBarrier();
96 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
97 }
98
99 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
100 Atomic32 increment) {
101 Atomic32 value;
102 int reloop;
103 do {
104 // Equivalent to:
105 //
106 // value = LDREX(ptr)
107 // value += increment
108 // reloop = STREX(ptr, value)
109 //
110 __asm__ __volatile__(" ldrex %0, [%3]\n"
111 " add %0, %0, %4\n"
112 " strex %1, %0, [%3]\n"
113 : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
114 : "r"(ptr), "r"(increment)
115 : "cc", "memory");
116 } while (reloop);
117 return value;
118 }
119
120 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
121 Atomic32 increment) {
122 // TODO(digit): Investigate if it's possible to implement this with
123 // a single MemoryBarrier() operation between the LDREX and STREX.
124 // See http://crbug.com/246514
125 MemoryBarrier();
126 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
127 MemoryBarrier();
128 return result;
129 }
130
131 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
132 Atomic32 new_value) {
133 Atomic32 old_value;
134 int reloop;
135 do {
136 // old_value = LDREX(ptr)
137 // fail = STREX(ptr, new_value)
138 __asm__ __volatile__(" ldrex %0, [%3]\n"
139 " strex %1, %4, [%3]\n"
140 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
141 : "r"(ptr), "r"(new_value)
142 : "cc", "memory");
143 } while (reloop != 0);
144 return old_value;
145 }
146
147 #else
Mark Mentovai 2013/06/05 13:34:31 Is there a C preprocessor macro you can use to ind
digit1 2013/06/05 16:35:47 I've done that. Note that the checks must be more
148
149 // The kernel also provides a helper function to perform an atomic
150 // compare-and-swap operation at the hard-wired address 0xffff0fc0.
151 // On ARMv5, this is implemented by a special code path that the kernel
152 // detects and treats specially when thread pre-emption happens.
153 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.
154 //
155 // Note that this always perform a full memory barrier, there is no
156 // need to add calls MemoryBarrier() before or after it. It also
157 // returns 0 on success, and 1 on exit.
158 //
159 // Available and reliable since Linux 2.6.24. Note that the first Android
160 // releases used 2.6.29, and ChromeOS is currently at 3.4, iirc, so this
Mark Mentovai 2013/06/05 13:34:31 “iirc” has no place in a comment. You have the lux
161 // should only be a concern for people running _really_ old custom
162 // Linux/ARM distributions).
Mark Mentovai 2013/06/05 13:34:31 You’re ending a parenthetical that you never start
163 typedef int (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,
164 Atomic32 new_value,
165 volatile Atomic32* ptr);
166 LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) =
Mark Mentovai 2013/06/05 13:34:31 Isn’t this saying that base::subtle::pLinuxKernelC
Mark Mentovai 2013/06/05 13:34:31 The p prefix is uncommon in Chrome code. variables
digit1 2013/06/05 16:35:47 Yes, this is part of the original code that was re
167 (LinuxKernelCmpxchgFunc)0xffff0fc0;
Mark Mentovai 2013/06/05 13:34:31 The typedef and this pointer should both be in an
Mark Mentovai 2013/06/05 13:34:31 Make this const? You don’t want someone accidental
digit1 2013/06/05 16:35:47 Same here.
168
169 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
170 Atomic32 old_value,
171 Atomic32 new_value) {
172 Atomic32 prev_value;
173 for (;;) {
174 prev_value = *ptr;
175 if (prev_value != old_value)
176 return prev_value;
177 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr))
178 return old_value;
179 }
180 }
181
182 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
183 Atomic32 new_value) {
184 Atomic32 old_value;
185 do {
186 old_value = *ptr;
187 } while (pLinuxKernelCmpxchg(old_value, new_value, ptr));
188 return old_value;
189 }
190
191 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
192 Atomic32 increment) {
193 return Barrier_AtomicIncrement(ptr, increment);
194 }
195
196 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
197 Atomic32 increment) {
198 for (;;) {
199 // Atomic exchange the old value with an incremented one.
200 Atomic32 old_value = *ptr;
201 Atomic32 new_value = old_value + increment;
202 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr)) {
203 // The exchange took place as expected.
204 return new_value;
205 }
206 // Otherwise, *ptr changed mid-loop and we need to retry.
207 }
208 }
209
210 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
211 Atomic32 old_value,
212 Atomic32 new_value) {
213 Atomic32 prev_value;
214 for (;;) {
215 prev_value = *ptr;
216 if (prev_value != old_value) {
217 // Always ensure acquire semantics.
218 MemoryBarrier();
219 return prev_value;
220 }
221 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr))
222 return old_value;
223 }
224 }
225
226 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
227 Atomic32 old_value,
228 Atomic32 new_value) {
229 // Use NoBarrier_CompareAndSwap(), because its implementation
230 // ensures that all stores happen through the kernel helper
231 // which always implement a full barrier.
232 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
233 }
234
235 #endif // __ARM_ARCH_6__ || __ARM_ARCH_7A__
236
237 // NOTE: Atomicity of the following load and store operations is only
238 // guaranteed in case of 32-bit alignement of |ptr| values.
239
240 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
241 *ptr = value;
242 }
243
244 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
245 *ptr = value;
246 MemoryBarrier();
247 }
248
249 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
250 MemoryBarrier();
251 *ptr = value;
252 }
253
254 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
255
256 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
257 Atomic32 value = *ptr;
258 MemoryBarrier();
259 return value;
260 }
261
262 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
263 MemoryBarrier();
264 return *ptr;
265 }
266
267 #undef LINUX_ARM_KERNEL_MEMORY_BARRIER
268
269 } // namespace base::subtle
270 } // namespace base
271
272 #endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
OLDNEW
« no previous file with comments | « base/atomicops.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698