Line 0
Link Here
|
|
|
1 |
--- mozilla/toolkit/components/protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h.orig 2017-10-31 10:44:14 UTC |
2 |
+++ mozilla/toolkit/components/protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h |
3 |
@@ -35,10 +35,17 @@ |
4 |
#ifndef GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
5 |
#define GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
6 |
|
7 |
+#if defined(__FreeBSD__) |
8 |
+#include <sys/types.h> |
9 |
+#include <machine/atomic.h> |
10 |
+#endif |
11 |
+ |
12 |
namespace google { |
13 |
namespace protobuf { |
14 |
namespace internal { |
15 |
|
16 |
+inline void MemoryBarrier() { |
17 |
+#if defined(__linux__) || defined(__ANDROID__) |
18 |
// 0xffff0fc0 is the hard coded address of a function provided by |
19 |
// the kernel which implements an atomic compare-exchange. On older |
20 |
// ARM architecture revisions (pre-v6) this may be implemented using |
21 |
@@ -53,29 +60,160 @@ LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute |
22 |
typedef void (*LinuxKernelMemoryBarrierFunc)(void); |
23 |
LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) = |
24 |
(LinuxKernelMemoryBarrierFunc) 0xffff0fa0; |
25 |
+#elif defined(__FreeBSD__) |
26 |
+ dsb(); |
27 |
+#else |
28 |
+#error MemoryBarrier() is not implemented on this platform. |
29 |
+#endif |
30 |
+} |
31 |
|
32 |
+// An ARM toolchain would only define one of these depending on which |
33 |
+// variant of the target architecture is being used. This tests against |
34 |
+// any known ARMv6 or ARMv7 variant, where it is possible to directly |
35 |
+// use ldrex/strex instructions to implement fast atomic operations. |
36 |
+#if defined(__ARM_ARCH_8A__) || \ |
37 |
+ defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ |
38 |
+ defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ |
39 |
+ defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ |
40 |
+ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ |
41 |
+ defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ |
42 |
+ defined(__ARM_ARCH_6KZ__) |
43 |
|
44 |
inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
45 |
Atomic32 old_value, |
46 |
Atomic32 new_value) { |
47 |
- Atomic32 prev_value = *ptr; |
48 |
+ Atomic32 prev_value; |
49 |
+ int reloop; |
50 |
do { |
51 |
- if (!pLinuxKernelCmpxchg(old_value, new_value, |
52 |
- const_cast<Atomic32*>(ptr))) { |
53 |
- return old_value; |
54 |
- } |
55 |
- prev_value = *ptr; |
56 |
- } while (prev_value == old_value); |
57 |
+ // The following is equivalent to: |
58 |
+ // |
59 |
+ // prev_value = LDREX(ptr) |
60 |
+ // reloop = 0 |
61 |
+ // if (prev_value != old_value) |
62 |
+ // reloop = STREX(ptr, new_value) |
63 |
+ __asm__ __volatile__(" ldrex %0, [%3]\n" |
64 |
+ " mov %1, #0\n" |
65 |
+ " cmp %0, %4\n" |
66 |
+#ifdef __thumb2__ |
67 |
+ " it eq\n" |
68 |
+#endif |
69 |
+ " strexeq %1, %5, [%3]\n" |
70 |
+ : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) |
71 |
+ : "r"(ptr), "r"(old_value), "r"(new_value) |
72 |
+ : "cc", "memory"); |
73 |
+ } while (reloop != 0); |
74 |
return prev_value; |
75 |
} |
76 |
|
77 |
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
78 |
+ Atomic32 old_value, |
79 |
+ Atomic32 new_value) { |
80 |
+ Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
81 |
+ MemoryBarrier(); |
82 |
+ return result; |
83 |
+} |
84 |
+ |
85 |
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
86 |
+ Atomic32 old_value, |
87 |
+ Atomic32 new_value) { |
88 |
+ MemoryBarrier(); |
89 |
+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
90 |
+} |
91 |
+ |
92 |
+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
93 |
+ Atomic32 increment) { |
94 |
+ Atomic32 value; |
95 |
+ int reloop; |
96 |
+ do { |
97 |
+ // Equivalent to: |
98 |
+ // |
99 |
+ // value = LDREX(ptr) |
100 |
+ // value += increment |
101 |
+ // reloop = STREX(ptr, value) |
102 |
+ // |
103 |
+ __asm__ __volatile__(" ldrex %0, [%3]\n" |
104 |
+ " add %0, %0, %4\n" |
105 |
+ " strex %1, %0, [%3]\n" |
106 |
+ : "=&r"(value), "=&r"(reloop), "+m"(*ptr) |
107 |
+ : "r"(ptr), "r"(increment) |
108 |
+ : "cc", "memory"); |
109 |
+ } while (reloop); |
110 |
+ return value; |
111 |
+} |
112 |
+ |
113 |
+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
114 |
+ Atomic32 increment) { |
115 |
+ // TODO(digit): Investigate if it's possible to implement this with |
116 |
+ // a single MemoryBarrier() operation between the LDREX and STREX. |
117 |
+ // See http://crbug.com/246514 |
118 |
+ MemoryBarrier(); |
119 |
+ Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); |
120 |
+ MemoryBarrier(); |
121 |
+ return result; |
122 |
+} |
123 |
+ |
124 |
inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
125 |
Atomic32 new_value) { |
126 |
Atomic32 old_value; |
127 |
+ int reloop; |
128 |
do { |
129 |
+ // old_value = LDREX(ptr) |
130 |
+ // reloop = STREX(ptr, new_value) |
131 |
+ __asm__ __volatile__(" ldrex %0, [%3]\n" |
132 |
+ " strex %1, %4, [%3]\n" |
133 |
+ : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) |
134 |
+ : "r"(ptr), "r"(new_value) |
135 |
+ : "cc", "memory"); |
136 |
+ } while (reloop != 0); |
137 |
+ return old_value; |
138 |
+} |
139 |
+ |
140 |
+// This tests against any known ARMv5 variant. |
141 |
+#elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ |
142 |
+ defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) |
143 |
+ |
144 |
+// The kernel also provides a helper function to perform an atomic |
145 |
+// compare-and-swap operation at the hard-wired address 0xffff0fc0. |
146 |
+// On ARMv5, this is implemented by a special code path that the kernel |
147 |
+// detects and treats specially when thread pre-emption happens. |
148 |
+// On ARMv6 and higher, it uses LDREX/STREX instructions instead. |
149 |
+// |
150 |
+// Note that this always perform a full memory barrier, there is no |
151 |
+// need to add calls MemoryBarrier() before or after it. It also |
152 |
+// returns 0 on success, and 1 on exit. |
153 |
+// |
154 |
+// Available and reliable since Linux 2.6.24. Both Android and ChromeOS |
155 |
+// use newer kernel revisions, so this should not be a concern. |
156 |
+namespace { |
157 |
+ |
158 |
+inline int LinuxKernelCmpxchg(Atomic32 old_value, |
159 |
+ Atomic32 new_value, |
160 |
+ volatile Atomic32* ptr) { |
161 |
+ typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); |
162 |
+ return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); |
163 |
+} |
164 |
+ |
165 |
+} // namespace |
166 |
+ |
167 |
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
168 |
+ Atomic32 old_value, |
169 |
+ Atomic32 new_value) { |
170 |
+ Atomic32 prev_value; |
171 |
+ for (;;) { |
172 |
+ prev_value = *ptr; |
173 |
+ if (prev_value != old_value) |
174 |
+ return prev_value; |
175 |
+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
176 |
+ return old_value; |
177 |
+ } |
178 |
+} |
179 |
+ |
180 |
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
181 |
+ Atomic32 new_value) { |
182 |
+ Atomic32 old_value; |
183 |
+ do { |
184 |
old_value = *ptr; |
185 |
- } while (pLinuxKernelCmpxchg(old_value, new_value, |
186 |
- const_cast<Atomic32*>(ptr))); |
187 |
+ } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); |
188 |
return old_value; |
189 |
} |
190 |
|
191 |
@@ -90,8 +228,7 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi |
192 |
// Atomic exchange the old value with an incremented one. |
193 |
Atomic32 old_value = *ptr; |
194 |
Atomic32 new_value = old_value + increment; |
195 |
- if (pLinuxKernelCmpxchg(old_value, new_value, |
196 |
- const_cast<Atomic32*>(ptr)) == 0) { |
197 |
+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { |
198 |
// The exchange took place as expected. |
199 |
return new_value; |
200 |
} |
201 |
@@ -102,23 +239,46 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi |
202 |
inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
203 |
Atomic32 old_value, |
204 |
Atomic32 new_value) { |
205 |
- return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
206 |
+ Atomic32 prev_value; |
207 |
+ for (;;) { |
208 |
+ prev_value = *ptr; |
209 |
+ if (prev_value != old_value) { |
210 |
+ // Always ensure acquire semantics. |
211 |
+ MemoryBarrier(); |
212 |
+ return prev_value; |
213 |
+ } |
214 |
+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
215 |
+ return old_value; |
216 |
+ } |
217 |
} |
218 |
|
219 |
inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
220 |
Atomic32 old_value, |
221 |
Atomic32 new_value) { |
222 |
- return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
223 |
+ // This could be implemented as: |
224 |
+ // MemoryBarrier(); |
225 |
+ // return NoBarrier_CompareAndSwap(); |
226 |
+ // |
227 |
+ // But would use 3 barriers per succesful CAS. To save performance, |
228 |
+ // use Acquire_CompareAndSwap(). Its implementation guarantees that: |
229 |
+ // - A succesful swap uses only 2 barriers (in the kernel helper). |
230 |
+ // - An early return due to (prev_value != old_value) performs |
231 |
+ // a memory barrier with no store, which is equivalent to the |
232 |
+ // generic implementation above. |
233 |
+ return Acquire_CompareAndSwap(ptr, old_value, new_value); |
234 |
} |
235 |
|
236 |
+#else |
237 |
+# error "Your CPU's ARM architecture is not supported yet" |
238 |
+#endif |
239 |
+ |
240 |
+// NOTE: Atomicity of the following load and store operations is only |
241 |
+// guaranteed in case of 32-bit alignement of |ptr| values. |
242 |
+ |
243 |
inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
244 |
*ptr = value; |
245 |
} |
246 |
|
247 |
-inline void MemoryBarrier() { |
248 |
- pLinuxKernelMemoryBarrier(); |
249 |
-} |
250 |
- |
251 |
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
252 |
*ptr = value; |
253 |
MemoryBarrier(); |
254 |
@@ -129,9 +289,7 @@ inline void Release_Store(volatile Atomic32* ptr, Atom |
255 |
*ptr = value; |
256 |
} |
257 |
|
258 |
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { |
259 |
- return *ptr; |
260 |
-} |
261 |
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } |
262 |
|
263 |
inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
264 |
Atomic32 value = *ptr; |