base/atomicops_internals_arm_gcc.h - Issue 16335007: Improve the implementation of atomic operations on Linux/ARM (including Android/ARM).

Side by Side Diff: base/atomicops_internals_arm_gcc.h

Issue 16335007: Improve the implementation of atomic operations on Linux/ARM (including Android/ARM). (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fix Acquire_CompareAndSwap() for ARMv5 Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // This file is an internal atomic implementation, use base/atomicops.h instead.

	6 //

	7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.

	8

	9 #ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_

	10 #define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_

	11

	12 namespace base {

	13 namespace subtle {

	14

	15 // Memory barriers on ARM are funky, but the kernel is here to help:
	Mark Mentovai 2013/06/05 13:34:31 Thanks for this comment! Thanks for this comment!
	16 //

	17 // * ARMv5 didn't support SMP, there is no memory barrier instruction at

	18 // all on this architecture, or when targetting its machine code.
	Mark Mentovai 2013/06/05 13:34:31 targeting targeting digit1 2013/06/05 16:35:47 Done. Show quoted text On 2013/06/05 13:34:31, Mark Mentovai wrote: > targeting Done.
	19 //

	20 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by

	21 // writing a random value to a very specific coprocessor register.

	22 //

	23 // * On ARMv7, the "dmb" instruction is used to perform a full memory

	24 // barrier (though writing to the co-processor will still work).

	25 // However, on single core devices (e.g. Nexus One, or Nexus S),

	26 // this instruction will take up to 200 ns, which is huge, even though

	27 // it's completely un-needed on these devices.

	28 //

	29 // * There is no easy way to determine at runtime if the device is

	30 // single or multi-core. However, the kernel provide a useful helper
	Mark Mentovai 2013/06/05 13:34:31 provides provides digit1 2013/06/05 16:35:47 Done. Show quoted text On 2013/06/05 13:34:31, Mark Mentovai wrote: > provides Done.
	31 // function at a fixed memory address (0xffff0fa0), which will always

	32 // perform a memory barrier in the most efficient way. I.e. on single

	33 // core devices, this is an empty function that exits immediately.

	34 // On multi-core devices, it implements a full memory barrier.

	35 //

	36 // Note that this helper function doesn't modify any register or memory.

	37 // See the comment in Barrier_AtomicIncrement() to see why it is
	Mark Mentovai 2013/06/05 13:34:31 There are two versions of Barrier_AtomicIncrement( There are two versions of Barrier_AtomicIncrement() and neither explains what this says will be explained. digit1 2013/06/05 16:35:47 This comment is obsolete (it was only useful for a This comment is obsolete (it was only useful for a previous patch that called the function through inlined assembly). I've removed it.
	38 // important.

	39 //

	40 // * This source could be compiled to ARMv5 machine code that runs on a

	41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers

	42 // are needed for correct execution. Always call the kernel helper, even

	43 // when targetting ARMv5TE.

	44 //

	45

	46 #define LINUX_ARM_KERNEL_MEMORY_BARRIER 0xffff0fa0
	Mark Mentovai 2013/06/05 13:34:31 You can avoid the macro entirely (and thus the nee You can avoid the macro entirely (and thus the need for the #undef at the bottom). Just write the number inline on line 51, the same as you did on line 167. That should be fine, since you don’t use this value anywhere else in the file. digit1 2013/06/05 16:35:47 Yes, again, this came from the previous patch were Yes, again, this came from the previous patch were the macro was needed in the inline assembly. Removed.
	47

	48 inline void MemoryBarrier() {
	Mark Mentovai 2013/06/05 13:34:31 Make this static or put it in an unnamed namespace Make this static or put it in an unnamed namespace. digit1 2013/06/05 16:35:47 Unfortunately, the function is declared in base/at Unfortunately, the function is declared in base/atomicops.h and thus must be in this namespace.
	49 // Note: This is a function call, which is also an implicit compiler

	50 // barrier.

	51 ((void (*)(void))LINUX_ARM_KERNEL_MEMORY_BARRIER)();
	Mark Mentovai 2013/06/05 13:34:31 This is C++, the second void is unnecessary, it ca This is C++, the second void is unnecessary, it can just be (). digit1 2013/06/05 16:35:47 Done. Show quoted text On 2013/06/05 13:34:31, Mark Mentovai wrote: > This is C++, the second void is unnecessary, it can just be (). Done.
	52 }

	53

	54 #if defined(__ARM_ARCH_7A__) \|\| defined(__ARM_ARCH_6__)

	55

	56 // On ARMv6 and higher, it is possible to directly use ldrex/strex

	57 // instructions to implement fast atomic operations directly.

	58 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

	59 Atomic32 old_value,

	60 Atomic32 new_value) {

	61 Atomic32 prev_value;

	62 int reloop;

	63 do {

	64 // The following is equivalent to:

	65 //

	66 // prev_value = LDREX(ptr)

	67 // reloop = 0

	68 // if (prev_value != old_value)

	69 // reloop = STREX(ptr, new_value)

	70 __asm__ __volatile__(" ldrex %0, [%3]\n"

	71 " mov %1, #0\n"

	72 " teq %0, %4\n"

	73 #ifdef __thumb2__

	74 " it eq\n"

	75 #endif

	76 " strexeq %1, %5, [%3]\n"

	77 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)

	78 : "r"(ptr), "r"(old_value), "r"(new_value)

	79 : "cc", "memory");

	80 } while (reloop != 0);

	81 return prev_value;

	82 }

	83

	84 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

	85 Atomic32 old_value,

	86 Atomic32 new_value) {

	87 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

	88 MemoryBarrier();

	89 return result;

	90 }

	91

	92 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

	93 Atomic32 old_value,

	94 Atomic32 new_value) {

	95 MemoryBarrier();

	96 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

	97 }

	98

	99 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

	100 Atomic32 increment) {

	101 Atomic32 value;

	102 int reloop;

	103 do {

	104 // Equivalent to:

	105 //

	106 // value = LDREX(ptr)

	107 // value += increment

	108 // reloop = STREX(ptr, value)

	109 //

	110 __asm__ __volatile__(" ldrex %0, [%3]\n"

	111 " add %0, %0, %4\n"

	112 " strex %1, %0, [%3]\n"

	113 : "=&r"(value), "=&r"(reloop), "+m"(*ptr)

	114 : "r"(ptr), "r"(increment)

	115 : "cc", "memory");

	116 } while (reloop);

	117 return value;

	118 }

	119

	120 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

	121 Atomic32 increment) {

	122 // TODO(digit): Investigate if it's possible to implement this with

	123 // a single MemoryBarrier() operation between the LDREX and STREX.

	124 // See http://crbug.com/246514

	125 MemoryBarrier();

	126 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);

	127 MemoryBarrier();

	128 return result;

	129 }

	130

	131 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

	132 Atomic32 new_value) {

	133 Atomic32 old_value;

	134 int reloop;

	135 do {

	136 // old_value = LDREX(ptr)

	137 // fail = STREX(ptr, new_value)

	138 __asm__ __volatile__(" ldrex %0, [%3]\n"

	139 " strex %1, %4, [%3]\n"

	140 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)

	141 : "r"(ptr), "r"(new_value)

	142 : "cc", "memory");

	143 } while (reloop != 0);

	144 return old_value;

	145 }

	146

	147 #else
	Mark Mentovai 2013/06/05 13:34:31 Is there a C preprocessor macro you can use to ind Is there a C preprocessor macro you can use to indicate that you’re targeting ARMv5? If so, use it here #elif defined(__ARM_ARCH_5__) Then have an #else #error whatever #endif at the bottom of the sequence. That way, when ARMv15 or whatever comes out and someone tries to compile this code targeting that architecture, they won’t fall into the ARMv5 implementation that they probably don’t want to use. digit1 2013/06/05 16:35:47 I've done that. Note that the checks must be more I've done that. Note that the checks must be more complex because the toolchain actually doesn't define __ARM_ARCH_5__ for Android/ARMv5, but __ARM_ARCH_5TE__, I've updated it.
	148

	149 // The kernel also provides a helper function to perform an atomic

	150 // compare-and-swap operation at the hard-wired address 0xffff0fc0.

	151 // On ARMv5, this is implemented by a special code path that the kernel

	152 // detects and treats specially when thread pre-emption happens.

	153 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.

	154 //

	155 // Note that this always perform a full memory barrier, there is no

	156 // need to add calls MemoryBarrier() before or after it. It also

	157 // returns 0 on success, and 1 on exit.

	158 //

	159 // Available and reliable since Linux 2.6.24. Note that the first Android

	160 // releases used 2.6.29, and ChromeOS is currently at 3.4, iirc, so this
	Mark Mentovai 2013/06/05 13:34:31 “iirc” has no place in a comment. You have the lux “iirc” has no place in a comment. You have the luxury of being able to look this up or ask someone. “currently” also becomes really stale in comments. Future readers (or future digits) shouldn’t have to resort to version history to figure out when “currently” was written. If you write “currently,” also write a date or a Chrome milestone or something else to identify when the statement was true.
	161 // should only be a concern for people running _really_ old custom

	162 // Linux/ARM distributions).
	Mark Mentovai 2013/06/05 13:34:31 You’re ending a parenthetical that you never start You’re ending a parenthetical that you never started.
	163 typedef int (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,

	164 Atomic32 new_value,

	165 volatile Atomic32* ptr);

	166 LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) =
	Mark Mentovai 2013/06/05 13:34:31 Isn’t this saying that base::subtle::pLinuxKernelC Isn’t this saying that base::subtle::pLinuxKernelCmpxchg is weak? You don’t care about that. You’re not expecting or wanting user code to override you. Mark Mentovai 2013/06/05 13:34:31 The p prefix is uncommon in Chrome code. variables The p prefix is uncommon in Chrome code. variables_are_normally_named_like_this. digit1 2013/06/05 16:35:47 Yes, this is part of the original code that was re Yes, this is part of the original code that was recently removed, so I didn't touch it too much. I've changed all this to a more Chrome-y format.
	167 (LinuxKernelCmpxchgFunc)0xffff0fc0;
	Mark Mentovai 2013/06/05 13:34:31 The typedef and this pointer should both be in an The typedef and this pointer should both be in an unnamed namespace. Mark Mentovai 2013/06/05 13:34:31 Make this const? You don’t want someone accidental Make this const? You don’t want someone accidentally changing the value. digit1 2013/06/05 16:35:47 Same here. Same here.
	168

	169 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

	170 Atomic32 old_value,

	171 Atomic32 new_value) {

	172 Atomic32 prev_value;

	173 for (;;) {

	174 prev_value = *ptr;

	175 if (prev_value != old_value)

	176 return prev_value;

	177 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr))

	178 return old_value;

	179 }

	180 }

	181

	182 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

	183 Atomic32 new_value) {

	184 Atomic32 old_value;

	185 do {

	186 old_value = *ptr;

	187 } while (pLinuxKernelCmpxchg(old_value, new_value, ptr));

	188 return old_value;

	189 }

	190

	191 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

	192 Atomic32 increment) {

	193 return Barrier_AtomicIncrement(ptr, increment);

	194 }

	195

	196 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

	197 Atomic32 increment) {

	198 for (;;) {

	199 // Atomic exchange the old value with an incremented one.

	200 Atomic32 old_value = *ptr;

	201 Atomic32 new_value = old_value + increment;

	202 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr)) {

	203 // The exchange took place as expected.

	204 return new_value;

	205 }

	206 // Otherwise, *ptr changed mid-loop and we need to retry.

	207 }

	208 }

	209

	210 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

	211 Atomic32 old_value,

	212 Atomic32 new_value) {

	213 Atomic32 prev_value;

	214 for (;;) {

	215 prev_value = *ptr;

	216 if (prev_value != old_value) {

	217 // Always ensure acquire semantics.

	218 MemoryBarrier();

	219 return prev_value;

	220 }

	221 if (!pLinuxKernelCmpxchg(old_value, new_value, ptr))

	222 return old_value;

	223 }

	224 }

	225

	226 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

	227 Atomic32 old_value,

	228 Atomic32 new_value) {

	229 // Use NoBarrier_CompareAndSwap(), because its implementation

	230 // ensures that all stores happen through the kernel helper

	231 // which always implement a full barrier.

	232 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

	233 }

	234

	235 #endif // __ARM_ARCH_6__ \|\| __ARM_ARCH_7A__

	236

	237 // NOTE: Atomicity of the following load and store operations is only

	238 // guaranteed in case of 32-bit alignement of \|ptr\| values.

	239

	240 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {

	241 *ptr = value;

	242 }

	243

	244 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {

	245 *ptr = value;

	246 MemoryBarrier();

	247 }

	248

	249 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {

	250 MemoryBarrier();

	251 *ptr = value;

	252 }

	253

	254 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }

	255

	256 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {

	257 Atomic32 value = *ptr;

	258 MemoryBarrier();

	259 return value;

	260 }

	261

	262 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {

	263 MemoryBarrier();

	264 return *ptr;

	265 }

	266

	267 #undef LINUX_ARM_KERNEL_MEMORY_BARRIER

	268

	269 } // namespace base::subtle

	270 } // namespace base

	271

	272 #endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_

OLD	NEW

« no previous file with comments | « base/atomicops.h ('k') | no next file » | no next file with comments »