src/base/atomicops_internals_arm_gcc.h - Issue 2438983002: Revert of Update implementation of atomics with latest Chromium version but use compiler builtin atomics

Side by Side Diff: src/base/atomicops_internals_arm_gcc.h

Issue 2438983002: Revert of Update implementation of atomics with latest Chromium version but use compiler builtin atomics (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2010 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // This file is an internal atomic implementation, use atomicops.h instead.

	6 //

	7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.

	8

	9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_

	10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_

	11

	12 #if defined(__QNXNTO__)

	13 #include <sys/cpuinline.h>

	14 #endif

	15

	16 namespace v8 {

	17 namespace base {

	18

	19 // Memory barriers on ARM are funky, but the kernel is here to help:

	20 //

	21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at

	22 // all on this architecture, or when targeting its machine code.

	23 //

	24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by

	25 // writing a random value to a very specific coprocessor register.

	26 //

	27 // * On ARMv7, the "dmb" instruction is used to perform a full memory

	28 // barrier (though writing to the co-processor will still work).

	29 // However, on single core devices (e.g. Nexus One, or Nexus S),

	30 // this instruction will take up to 200 ns, which is huge, even though

	31 // it's completely un-needed on these devices.

	32 //

	33 // * There is no easy way to determine at runtime if the device is

	34 // single or multi-core. However, the kernel provides a useful helper

	35 // function at a fixed memory address (0xffff0fa0), which will always

	36 // perform a memory barrier in the most efficient way. I.e. on single

	37 // core devices, this is an empty function that exits immediately.

	38 // On multi-core devices, it implements a full memory barrier.

	39 //

	40 // * This source could be compiled to ARMv5 machine code that runs on a

	41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers

	42 // are needed for correct execution. Always call the kernel helper, even

	43 // when targeting ARMv5TE.

	44 //

	45

	46 inline void MemoryBarrier() {

	47 #if defined(__ANDROID__)

	48 // Note: This is a function call, which is also an implicit compiler barrier.

	49 typedef void (*KernelMemoryBarrierFunc)();

	50 ((KernelMemoryBarrierFunc)0xffff0fa0)();

	51 #elif defined(__QNXNTO__)

	52 __cpu_membarrier();

	53 #else

	54 // Fallback to GCC built-in function

	55 __sync_synchronize();

	56 #endif

	57 }

	58

	59 // An ARM toolchain would only define one of these depending on which

	60 // variant of the target architecture is being used. This tests against

	61 // any known ARMv6 or ARMv7 variant, where it is possible to directly

	62 // use ldrex/strex instructions to implement fast atomic operations.

	63 #if defined(__ARM_ARCH_8A__) \|\| \

	64 defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \

	65 defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \

	66 defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \

	67 defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \

	68 defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__)

	69

	70 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

	71 Atomic32 old_value,

	72 Atomic32 new_value) {

	73 Atomic32 prev_value;

	74 int reloop;

	75 do {

	76 // The following is equivalent to:

	77 //

	78 // prev_value = LDREX(ptr)

	79 // reloop = 0

	80 // if (prev_value != old_value)

	81 // reloop = STREX(ptr, new_value)

	82 __asm__ __volatile__(" ldrex %0, [%3]\n"

	83 " mov %1, #0\n"

	84 " cmp %0, %4\n"

	85 #ifdef __thumb2__

	86 " it eq\n"

	87 #endif

	88 " strexeq %1, %5, [%3]\n"

	89 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)

	90 : "r"(ptr), "r"(old_value), "r"(new_value)

	91 : "cc", "memory");

	92 } while (reloop != 0);

	93 return prev_value;

	94 }

	95

	96 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

	97 Atomic32 old_value,

	98 Atomic32 new_value) {

	99 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

	100 MemoryBarrier();

	101 return result;

	102 }

	103

	104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

	105 Atomic32 old_value,

	106 Atomic32 new_value) {

	107 MemoryBarrier();

	108 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

	109 }

	110

	111 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

	112 Atomic32 increment) {

	113 Atomic32 value;

	114 int reloop;

	115 do {

	116 // Equivalent to:

	117 //

	118 // value = LDREX(ptr)

	119 // value += increment

	120 // reloop = STREX(ptr, value)

	121 //

	122 __asm__ __volatile__(" ldrex %0, [%3]\n"

	123 " add %0, %0, %4\n"

	124 " strex %1, %0, [%3]\n"

	125 : "=&r"(value), "=&r"(reloop), "+m"(*ptr)

	126 : "r"(ptr), "r"(increment)

	127 : "cc", "memory");

	128 } while (reloop);

	129 return value;

	130 }

	131

	132 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

	133 Atomic32 increment) {

	134 // TODO(digit): Investigate if it's possible to implement this with

	135 // a single MemoryBarrier() operation between the LDREX and STREX.

	136 // See http://crbug.com/246514

	137 MemoryBarrier();

	138 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);

	139 MemoryBarrier();

	140 return result;

	141 }

	142

	143 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

	144 Atomic32 new_value) {

	145 Atomic32 old_value;

	146 int reloop;

	147 do {

	148 // old_value = LDREX(ptr)

	149 // reloop = STREX(ptr, new_value)

	150 __asm__ __volatile__(" ldrex %0, [%3]\n"

	151 " strex %1, %4, [%3]\n"

	152 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)

	153 : "r"(ptr), "r"(new_value)

	154 : "cc", "memory");

	155 } while (reloop != 0);

	156 return old_value;

	157 }

	158

	159 // This tests against any known ARMv5 variant.

	160 #elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \

	161 defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)

	162

	163 // The kernel also provides a helper function to perform an atomic

	164 // compare-and-swap operation at the hard-wired address 0xffff0fc0.

	165 // On ARMv5, this is implemented by a special code path that the kernel

	166 // detects and treats specially when thread pre-emption happens.

	167 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.

	168 //

	169 // Note that this always perform a full memory barrier, there is no

	170 // need to add calls MemoryBarrier() before or after it. It also

	171 // returns 0 on success, and 1 on exit.

	172 //

	173 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS

	174 // use newer kernel revisions, so this should not be a concern.

	175 namespace {

	176

	177 inline int LinuxKernelCmpxchg(Atomic32 old_value,

	178 Atomic32 new_value,

	179 volatile Atomic32* ptr) {

	180 typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);

	181 return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);

	182 }

	183

	184 } // namespace

	185

	186 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

	187 Atomic32 old_value,

	188 Atomic32 new_value) {

	189 Atomic32 prev_value;

	190 for (;;) {

	191 prev_value = *ptr;

	192 if (prev_value != old_value)

	193 return prev_value;

	194 if (!LinuxKernelCmpxchg(old_value, new_value, ptr))

	195 return old_value;

	196 }

	197 }

	198

	199 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

	200 Atomic32 new_value) {

	201 Atomic32 old_value;

	202 do {

	203 old_value = *ptr;

	204 } while (LinuxKernelCmpxchg(old_value, new_value, ptr));

	205 return old_value;

	206 }

	207

	208 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

	209 Atomic32 increment) {

	210 return Barrier_AtomicIncrement(ptr, increment);

	211 }

	212

	213 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

	214 Atomic32 increment) {

	215 for (;;) {

	216 // Atomic exchange the old value with an incremented one.

	217 Atomic32 old_value = *ptr;

	218 Atomic32 new_value = old_value + increment;

	219 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {

	220 // The exchange took place as expected.

	221 return new_value;

	222 }

	223 // Otherwise, *ptr changed mid-loop and we need to retry.

	224 }

	225 }

	226

	227 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

	228 Atomic32 old_value,

	229 Atomic32 new_value) {

	230 Atomic32 prev_value;

	231 for (;;) {

	232 prev_value = *ptr;

	233 if (prev_value != old_value) {

	234 // Always ensure acquire semantics.

	235 MemoryBarrier();

	236 return prev_value;

	237 }

	238 if (!LinuxKernelCmpxchg(old_value, new_value, ptr))

	239 return old_value;

	240 }

	241 }

	242

	243 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

	244 Atomic32 old_value,

	245 Atomic32 new_value) {

	246 // This could be implemented as:

	247 // MemoryBarrier();

	248 // return NoBarrier_CompareAndSwap();

	249 //

	250 // But would use 3 barriers per succesful CAS. To save performance,

	251 // use Acquire_CompareAndSwap(). Its implementation guarantees that:

	252 // - A succesful swap uses only 2 barriers (in the kernel helper).

	253 // - An early return due to (prev_value != old_value) performs

	254 // a memory barrier with no store, which is equivalent to the

	255 // generic implementation above.

	256 return Acquire_CompareAndSwap(ptr, old_value, new_value);

	257 }

	258

	259 #else

	260 # error "Your CPU's ARM architecture is not supported yet"

	261 #endif

	262

	263 // NOTE: Atomicity of the following load and store operations is only

	264 // guaranteed in case of 32-bit alignement of \|ptr\| values.

	265

	266 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {

	267 *ptr = value;

	268 }

	269

	270 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {

	271 *ptr = value;

	272 MemoryBarrier();

	273 }

	274

	275 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {

	276 MemoryBarrier();

	277 *ptr = value;

	278 }

	279

	280 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }

	281

	282 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {

	283 Atomic32 value = *ptr;

	284 MemoryBarrier();

	285 return value;

	286 }

	287

	288 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {

	289 MemoryBarrier();

	290 return *ptr;

	291 }

	292

	293 // Byte accessors.

	294

	295 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) {

	296 *ptr = value;

	297 }

	298

	299 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; }

	300

	301 } // namespace base

	302 } // namespace v8

	303

	304 #endif // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_

OLD	NEW

« no previous file with comments | « src/base/atomicops_internals_arm64_gcc.h ('k') | src/base/atomicops_internals_atomicword_compat.h » ('j') | no next file with comments »