Attachment #189899 for bug #225311

View | Details | Raw Unified | Return to bug 225311
Collapse All | Expand All

Lines 63-68 Link Here

(-)mail/thunderbird/Makefile (+6 lines)
63	MOZ_OPTIONS+= --disable-calendar	63	MOZ_OPTIONS+= --disable-calendar
64	.endif	64	.endif
65		65
		66	.if ${ARCH} == armv6 \|\| ${ARCH} == armv7
		67	BUILD_DEPENDS+= ${LOCALBASE}/bin/as:devel/binutils
		68	CONFIGURE_ENV+= COMPILER_PATH=${LOCALBASE}/bin
		69	MAKE_ENV+= COMPILER_PATH=${LOCALBASE}/bin
		70	.endif
		71
66	post-extract:	72	post-extract:
67	@${SED} -e 's\|@PORTNAME_ICON@\|${PORTNAME_ICON}\|;s\|@MOZILLA@\|${MOZILLA}\|' \	73	@${SED} -e 's\|@PORTNAME_ICON@\|${PORTNAME_ICON}\|;s\|@MOZILLA@\|${MOZILLA}\|' \
68	<${FILESDIR}/thunderbird.desktop.in >${WRKDIR}/${MOZILLA_EXEC_NAME}.desktop	74	<${FILESDIR}/thunderbird.desktop.in >${WRKDIR}/${MOZILLA_EXEC_NAME}.desktop




--- mozilla/ipc/chromium/src/base/atomicops_internals_arm_gcc.h.orig	2017-10-31 10:44:20 UTC
+++ mozilla/ipc/chromium/src/base/atomicops_internals_arm_gcc.h
@@ -14,6 +14,13 @@
 namespace base {
 namespace subtle {
 
+#if defined(__FreeBSD__)
+#include <sys/types.h>
+#include <machine/atomic.h>
+#endif
+
+inline void MemoryBarrier() {
+#if defined(__linux__) || defined(__ANDROID__)
 // 0xffff0fc0 is the hard coded address of a function provided by
 // the kernel which implements an atomic compare-exchange. On older
 // ARM architecture revisions (pre-v6) this may be implemented using
@@ -28,29 +35,161 @@ LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute
 typedef void (*LinuxKernelMemoryBarrierFunc)(void);
 LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
     (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
+#elif defined(__FreeBSD__)
+  dsb();
+#else
+#error MemoryBarrier() is not implemented on this platform.
+#endif
+}
 
+// An ARM toolchain would only define one of these depending on which
+// variant of the target architecture is being used. This tests against
+// any known ARMv6 or ARMv7 variant, where it is possible to directly
+// use ldrex/strex instructions to implement fast atomic operations.
+#if defined(__ARM_ARCH_8A__) || \
+    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) ||  \
+    defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
+    defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) ||  \
+    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
+    defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6KZ__)
 
+
 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
                                          Atomic32 old_value,
                                          Atomic32 new_value) {
-  Atomic32 prev_value = *ptr;
+  Atomic32 prev_value;
+  int reloop;
   do {
-    if (!pLinuxKernelCmpxchg(old_value, new_value,
-                             const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
+    // The following is equivalent to:
+    //
+    //   prev_value = LDREX(ptr)
+    //   reloop = 0
+    //   if (prev_value != old_value)
+    //      reloop = STREX(ptr, new_value)
+    __asm__ __volatile__("    ldrex %0, [%3]\n"
+                         "    mov %1, #0\n"
+                         "    cmp %0, %4\n"
+#ifdef __thumb2__
+                         "    it eq\n"
+#endif
+                         "    strexeq %1, %5, [%3]\n"
+                         : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
+                         : "r"(ptr), "r"(old_value), "r"(new_value)
+                         : "cc", "memory");
+  } while (reloop != 0);
   return prev_value;
 }
 
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+  return result;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  MemoryBarrier();
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+                                          Atomic32 increment) {
+  Atomic32 value;
+  int reloop;
+  do {
+    // Equivalent to:
+    //
+    //  value = LDREX(ptr)
+    //  value += increment
+    //  reloop = STREX(ptr, value)
+    //
+    __asm__ __volatile__("    ldrex %0, [%3]\n"
+                         "    add %0, %0, %4\n"
+                         "    strex %1, %0, [%3]\n"
+                         : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
+                         : "r"(ptr), "r"(increment)
+                         : "cc", "memory");
+  } while (reloop);
+  return value;
+}
+
+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+                                        Atomic32 increment) {
+  // TODO(digit): Investigate if it's possible to implement this with
+  // a single MemoryBarrier() operation between the LDREX and STREX.
+  // See http://crbug.com/246514
+  MemoryBarrier();
+  Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
+  MemoryBarrier();
+  return result;
+}
+
 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
                                          Atomic32 new_value) {
   Atomic32 old_value;
+  int reloop;
   do {
+    // old_value = LDREX(ptr)
+    // reloop = STREX(ptr, new_value)
+    __asm__ __volatile__("   ldrex %0, [%3]\n"
+                         "   strex %1, %4, [%3]\n"
+                         : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
+                         : "r"(ptr), "r"(new_value)
+                         : "cc", "memory");
+  } while (reloop != 0);
+  return old_value;
+}
+
+// This tests against any known ARMv5 variant.
+#elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
+      defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
+
+// The kernel also provides a helper function to perform an atomic
+// compare-and-swap operation at the hard-wired address 0xffff0fc0.
+// On ARMv5, this is implemented by a special code path that the kernel
+// detects and treats specially when thread pre-emption happens.
+// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
+//
+// Note that this always perform a full memory barrier, there is no
+// need to add calls MemoryBarrier() before or after it. It also
+// returns 0 on success, and 1 on exit.
+//
+// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
+// use newer kernel revisions, so this should not be a concern.
+namespace {
+
+inline int LinuxKernelCmpxchg(Atomic32 old_value,
+                              Atomic32 new_value,
+                              volatile Atomic32* ptr) {
+  typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
+  return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+}  // namespace
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev_value;
+  for (;;) {
+    prev_value = *ptr;
+    if (prev_value != old_value)
+      return prev_value;
+    if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
+      return old_value;
+  }
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
     old_value = *ptr;
-  } while (pLinuxKernelCmpxchg(old_value, new_value,
-                               const_cast<Atomic32*>(ptr)));
+  } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
   return old_value;
 }
 
@@ -65,36 +204,57 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi
     // Atomic exchange the old value with an incremented one.
     Atomic32 old_value = *ptr;
     Atomic32 new_value = old_value + increment;
-    if (pLinuxKernelCmpxchg(old_value, new_value,
-                            const_cast<Atomic32*>(ptr)) == 0) {
+    if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
       // The exchange took place as expected.
       return new_value;
     }
     // Otherwise, *ptr changed mid-loop and we need to retry.
   }
-
 }
 
 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
                                        Atomic32 old_value,
                                        Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  Atomic32 prev_value;
+  for (;;) {
+    prev_value = *ptr;
+    if (prev_value != old_value) {
+      // Always ensure acquire semantics.
+      MemoryBarrier();
+      return prev_value;
+    }
+    if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
+      return old_value;
+  }
 }
 
 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
                                        Atomic32 old_value,
                                        Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  // This could be implemented as:
+  //    MemoryBarrier();
+  //    return NoBarrier_CompareAndSwap();
+  //
+  // But would use 3 barriers per succesful CAS. To save performance,
+  // use Acquire_CompareAndSwap(). Its implementation guarantees that:
+  // - A succesful swap uses only 2 barriers (in the kernel helper).
+  // - An early return due to (prev_value != old_value) performs
+  //   a memory barrier with no store, which is equivalent to the
+  //   generic implementation above.
+  return Acquire_CompareAndSwap(ptr, old_value, new_value);
 }
 
+#else
+#  error "Your CPU's ARM architecture is not supported yet"
+#endif
+
+// NOTE: Atomicity of the following load and store operations is only
+// guaranteed in case of 32-bit alignement of |ptr| values.
+
 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
   *ptr = value;
 }
 
-inline void MemoryBarrier() {
-  pLinuxKernelMemoryBarrier();
-}
-
 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
   *ptr = value;
   MemoryBarrier();
@@ -105,9 +265,7 @@ inline void Release_Store(volatile Atomic32* ptr, Atom
   *ptr = value;
 }
 
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
 
 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
   Atomic32 value = *ptr;




--- mozilla/media/libtheora/lib/arm/armcpu.c.orig	2017-10-03 06:47:36 UTC
+++ mozilla/media/libtheora/lib/arm/armcpu.c
@@ -107,6 +107,11 @@ ogg_uint32_t oc_cpu_flags_get(void){
   return flags;
 }
 
+#elif defined(__FreeBSD__)
+ogg_uint32_t oc_cpu_flags_get(void){
+  return 0;
+}
+
 #else
 /*The feature registers which can tell us what the processor supports are
    accessible in priveleged modes only, so we can't have a general user-space




--- mozilla/media/libtheora/moz.build.orig	2017-04-11 04:13:12 UTC
+++ mozilla/media/libtheora/moz.build
@@ -87,31 +87,34 @@ if CONFIG['GNU_AS']:
             'lib/arm/armcpu.c',
             'lib/arm/armstate.c',
         ]
-        for var in ('OC_ARM_ASM',
-                    'OC_ARM_ASM_EDSP',
-                    'OC_ARM_ASM_MEDIA',
-                    'OC_ARM_ASM_NEON'):
-            DEFINES[var] = True
-        # The Android NDK doesn't pre-define anything to indicate the OS it's
-        # on, so do it for them.
-        if CONFIG['OS_TARGET'] == 'Android':
-            DEFINES['__linux__'] = True
+        if CONFIG['BUILD_ARM_NEON']:
+            for var in ('OC_ARM_ASM',
+                        'OC_ARM_ASM_EDSP',
+                        'OC_ARM_ASM_MEDIA',
+                        'OC_ARM_ASM_NEON'):
+                DEFINES[var] = True
+            # The Android NDK doesn't pre-define anything to indicate the OS it's
+            # on, so do it for them.
+            if CONFIG['OS_TARGET'] == 'Android':
+                DEFINES['__linux__'] = True
+    
+            SOURCES += [ '!%s.s' % f for f in [
+                'armbits-gnu',
+                'armfrag-gnu',
+                'armidct-gnu',
+                'armloop-gnu',
+            ]]
+    
+            # These flags are a lie; they're just used to enable the requisite
+            # opcodes; actual arch detection is done at runtime.
+            ASFLAGS += [
+                '-march=armv7-a',
+            ]
+            ASFLAGS += CONFIG['NEON_FLAGS']
 
-        SOURCES += [ '!%s.s' % f for f in [
-            'armbits-gnu',
-            'armfrag-gnu',
-            'armidct-gnu',
-            'armloop-gnu',
-        ]]
-
-        # These flags are a lie; they're just used to enable the requisite
-        # opcodes; actual arch detection is done at runtime.
-        ASFLAGS += [
-            '-march=armv7-a',
-        ]
-        ASFLAGS += CONFIG['NEON_FLAGS']
-
         if CONFIG['CLANG_CXX']:
             ASFLAGS += [
                 '-no-integrated-as',
+                '-Wa,-march=armv7-a',
+                '-Wa,-mfpu=neon',
             ]




--- mozilla/toolkit/components/protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h.orig	2017-10-31 10:44:14 UTC
+++ mozilla/toolkit/components/protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h
@@ -35,10 +35,17 @@
 #ifndef GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_ARM_GCC_H_
 #define GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_ARM_GCC_H_
 
+#if defined(__FreeBSD__)
+#include <sys/types.h>
+#include <machine/atomic.h>
+#endif
+
 namespace google {
 namespace protobuf {
 namespace internal {
 
+inline void MemoryBarrier() {
+#if defined(__linux__) || defined(__ANDROID__)
 // 0xffff0fc0 is the hard coded address of a function provided by
 // the kernel which implements an atomic compare-exchange. On older
 // ARM architecture revisions (pre-v6) this may be implemented using
@@ -53,29 +60,160 @@ LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute
 typedef void (*LinuxKernelMemoryBarrierFunc)(void);
 LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
     (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
+#elif defined(__FreeBSD__)
+  dsb();
+#else
+#error MemoryBarrier() is not implemented on this platform.
+#endif
+}
 
+// An ARM toolchain would only define one of these depending on which
+// variant of the target architecture is being used. This tests against
+// any known ARMv6 or ARMv7 variant, where it is possible to directly
+// use ldrex/strex instructions to implement fast atomic operations.
+#if defined(__ARM_ARCH_8A__) || \
+    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) ||  \
+    defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
+    defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) ||  \
+    defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
+    defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6KZ__)
 
 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
                                          Atomic32 old_value,
                                          Atomic32 new_value) {
-  Atomic32 prev_value = *ptr;
+  Atomic32 prev_value;
+  int reloop;
   do {
-    if (!pLinuxKernelCmpxchg(old_value, new_value,
-                             const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
+    // The following is equivalent to:
+    //
+    //   prev_value = LDREX(ptr)
+    //   reloop = 0
+    //   if (prev_value != old_value)
+    //      reloop = STREX(ptr, new_value)
+    __asm__ __volatile__("    ldrex %0, [%3]\n"
+                         "    mov %1, #0\n"
+                         "    cmp %0, %4\n"
+#ifdef __thumb2__
+                         "    it eq\n"
+#endif
+                         "    strexeq %1, %5, [%3]\n"
+                         : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
+                         : "r"(ptr), "r"(old_value), "r"(new_value)
+                         : "cc", "memory");
+  } while (reloop != 0);
   return prev_value;
 }
 
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+  return result;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  MemoryBarrier();
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+                                          Atomic32 increment) {
+  Atomic32 value;
+  int reloop;
+  do {
+    // Equivalent to:
+    //
+    //  value = LDREX(ptr)
+    //  value += increment
+    //  reloop = STREX(ptr, value)
+    //
+    __asm__ __volatile__("    ldrex %0, [%3]\n"
+                         "    add %0, %0, %4\n"
+                         "    strex %1, %0, [%3]\n"
+                         : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
+                         : "r"(ptr), "r"(increment)
+                         : "cc", "memory");
+  } while (reloop);
+  return value;
+}
+
+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+                                        Atomic32 increment) {
+  // TODO(digit): Investigate if it's possible to implement this with
+  // a single MemoryBarrier() operation between the LDREX and STREX.
+  // See http://crbug.com/246514
+  MemoryBarrier();
+  Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
+  MemoryBarrier();
+  return result;
+}
+
 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
                                          Atomic32 new_value) {
   Atomic32 old_value;
+  int reloop;
   do {
+    // old_value = LDREX(ptr)
+    // reloop = STREX(ptr, new_value)
+    __asm__ __volatile__("   ldrex %0, [%3]\n"
+                         "   strex %1, %4, [%3]\n"
+                         : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
+                         : "r"(ptr), "r"(new_value)
+                         : "cc", "memory");
+  } while (reloop != 0);
+  return old_value;
+}
+
+// This tests against any known ARMv5 variant.
+#elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
+      defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
+
+// The kernel also provides a helper function to perform an atomic
+// compare-and-swap operation at the hard-wired address 0xffff0fc0.
+// On ARMv5, this is implemented by a special code path that the kernel
+// detects and treats specially when thread pre-emption happens.
+// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
+//
+// Note that this always perform a full memory barrier, there is no
+// need to add calls MemoryBarrier() before or after it. It also
+// returns 0 on success, and 1 on exit.
+//
+// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
+// use newer kernel revisions, so this should not be a concern.
+namespace {
+
+inline int LinuxKernelCmpxchg(Atomic32 old_value,
+                              Atomic32 new_value,
+                              volatile Atomic32* ptr) {
+  typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
+  return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+}  // namespace
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev_value;
+  for (;;) {
+    prev_value = *ptr;
+    if (prev_value != old_value)
+      return prev_value;
+    if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
+      return old_value;
+  }
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
     old_value = *ptr;
-  } while (pLinuxKernelCmpxchg(old_value, new_value,
-                               const_cast<Atomic32*>(ptr)));
+  } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
   return old_value;
 }
 
@@ -90,8 +228,7 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi
     // Atomic exchange the old value with an incremented one.
     Atomic32 old_value = *ptr;
     Atomic32 new_value = old_value + increment;
-    if (pLinuxKernelCmpxchg(old_value, new_value,
-                            const_cast<Atomic32*>(ptr)) == 0) {
+    if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
       // The exchange took place as expected.
       return new_value;
     }
@@ -102,23 +239,46 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi
 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
                                        Atomic32 old_value,
                                        Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  Atomic32 prev_value;
+  for (;;) {
+    prev_value = *ptr;
+    if (prev_value != old_value) {
+      // Always ensure acquire semantics.
+      MemoryBarrier();
+      return prev_value;
+    }
+    if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
+      return old_value;
+  }
 }
 
 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
                                        Atomic32 old_value,
                                        Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  // This could be implemented as:
+  //    MemoryBarrier();
+  //    return NoBarrier_CompareAndSwap();
+  //
+  // But would use 3 barriers per succesful CAS. To save performance,
+  // use Acquire_CompareAndSwap(). Its implementation guarantees that:
+  // - A succesful swap uses only 2 barriers (in the kernel helper).
+  // - An early return due to (prev_value != old_value) performs
+  //   a memory barrier with no store, which is equivalent to the
+  //   generic implementation above.
+  return Acquire_CompareAndSwap(ptr, old_value, new_value);
 }
 
+#else
+#  error "Your CPU's ARM architecture is not supported yet"
+#endif
+
+// NOTE: Atomicity of the following load and store operations is only
+// guaranteed in case of 32-bit alignement of |ptr| values.
+
 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
   *ptr = value;
 }
 
-inline void MemoryBarrier() {
-  pLinuxKernelMemoryBarrier();
-}
-
 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
   *ptr = value;
   MemoryBarrier();
@@ -129,9 +289,7 @@ inline void Release_Store(volatile Atomic32* ptr, Atom
   *ptr = value;
 }
 
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
 
 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
   Atomic32 value = *ptr;

Line 0 Link Here

(-)mail/thunderbird/files/patch-mozilla_xpcom_reflect_xptcall_md_unix_moz.build (+11 lines)
	1	--- mozilla/xpcom/reflect/xptcall/md/unix/moz.build.orig 2017-10-04 11:41:06 UTC
	2	+++ mozilla/xpcom/reflect/xptcall/md/unix/moz.build
	3	@@ -90,7 +90,7 @@ if CONFIG['OS_TEST'] == 'alpha':
	4	]
	5
	6	if CONFIG['CPU_ARCH'] == 'arm' or CONFIG['OS_TEST'] == 'sa110':
	7	- if CONFIG['OS_ARCH'] == 'Linux':
	8	+ if CONFIG['OS_ARCH'] in ('Linux', 'FreeBSD'):
	9	SOURCES += [
	10	'xptcinvoke_arm.cpp',
	11	'xptcstubs_arm.cpp'

Return to bug 225311

Line 0 Link Here

(-)mail/thunderbird/files/patch-mozilla_ipc_chromium_src_base__atomicops__internals__arm__gcc.h (+265 lines)
		1	--- mozilla/ipc/chromium/src/base/atomicops_internals_arm_gcc.h.orig 2017-10-31 10:44:20 UTC
		2	+++ mozilla/ipc/chromium/src/base/atomicops_internals_arm_gcc.h
		3	@@ -14,6 +14,13 @@
		4	namespace base {
		5	namespace subtle {
		6
		7	+#if defined(__FreeBSD__)
		8	+#include <sys/types.h>
		9	+#include <machine/atomic.h>
		10	+#endif
		11	+
		12	+inline void MemoryBarrier() {
		13	+#if defined(__linux__) \|\| defined(__ANDROID__)
		14	// 0xffff0fc0 is the hard coded address of a function provided by
		15	// the kernel which implements an atomic compare-exchange. On older
		16	// ARM architecture revisions (pre-v6) this may be implemented using
		17	@@ -28,29 +35,161 @@ LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute
		18	typedef void (*LinuxKernelMemoryBarrierFunc)(void);
		19	LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
		20	(LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
		21	+#elif defined(__FreeBSD__)
		22	+ dsb();
		23	+#else
		24	+#error MemoryBarrier() is not implemented on this platform.
		25	+#endif
		26	+}
		27
		28	+// An ARM toolchain would only define one of these depending on which
		29	+// variant of the target architecture is being used. This tests against
		30	+// any known ARMv6 or ARMv7 variant, where it is possible to directly
		31	+// use ldrex/strex instructions to implement fast atomic operations.
		32	+#if defined(__ARM_ARCH_8A__) \|\| \
		33	+ defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \
		34	+ defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \
		35	+ defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \
		36	+ defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \
		37	+ defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__) \|\| \
		38	+ defined(__ARM_ARCH_6KZ__)
		39
		40	+
		41	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
		42	Atomic32 old_value,
		43	Atomic32 new_value) {
		44	- Atomic32 prev_value = *ptr;
		45	+ Atomic32 prev_value;
		46	+ int reloop;
		47	do {
		48	- if (!pLinuxKernelCmpxchg(old_value, new_value,
		49	- const_cast<Atomic32*>(ptr))) {
		50	- return old_value;
		51	- }
		52	- prev_value = *ptr;
		53	- } while (prev_value == old_value);
		54	+ // The following is equivalent to:
		55	+ //
		56	+ // prev_value = LDREX(ptr)
		57	+ // reloop = 0
		58	+ // if (prev_value != old_value)
		59	+ // reloop = STREX(ptr, new_value)
		60	+ __asm__ __volatile__(" ldrex %0, [%3]\n"
		61	+ " mov %1, #0\n"
		62	+ " cmp %0, %4\n"
		63	+#ifdef __thumb2__
		64	+ " it eq\n"
65	+#endif
66	+ " strexeq %1, %5, [%3]\n"
67	+ : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
68	+ : "r"(ptr), "r"(old_value), "r"(new_value)
69	+ : "cc", "memory");
70	+ } while (reloop != 0);
71	return prev_value;
72	}
73
74	+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
75	+ Atomic32 old_value,
76	+ Atomic32 new_value) {
77	+ Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
78	+ MemoryBarrier();
79	+ return result;
80	+}
81	+
82	+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
83	+ Atomic32 old_value,
84	+ Atomic32 new_value) {
85	+ MemoryBarrier();
86	+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
87	+}
88	+
89	+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
90	+ Atomic32 increment) {
91	+ Atomic32 value;
92	+ int reloop;
93	+ do {
94	+ // Equivalent to:
95	+ //
96	+ // value = LDREX(ptr)
97	+ // value += increment
98	+ // reloop = STREX(ptr, value)
99	+ //
100	+ __asm__ __volatile__(" ldrex %0, [%3]\n"
101	+ " add %0, %0, %4\n"
102	+ " strex %1, %0, [%3]\n"
103	+ : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
104	+ : "r"(ptr), "r"(increment)
105	+ : "cc", "memory");
106	+ } while (reloop);
107	+ return value;
108	+}
109	+
110	+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
111	+ Atomic32 increment) {
112	+ // TODO(digit): Investigate if it's possible to implement this with
113	+ // a single MemoryBarrier() operation between the LDREX and STREX.
114	+ // See http://crbug.com/246514
115	+ MemoryBarrier();
116	+ Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
117	+ MemoryBarrier();
118	+ return result;
119	+}
120	+
121	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
122	Atomic32 new_value) {
123	Atomic32 old_value;
124	+ int reloop;
125	do {
126	+ // old_value = LDREX(ptr)
127	+ // reloop = STREX(ptr, new_value)
128	+ __asm__ __volatile__(" ldrex %0, [%3]\n"
129	+ " strex %1, %4, [%3]\n"
130	+ : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
131	+ : "r"(ptr), "r"(new_value)
132	+ : "cc", "memory");
133	+ } while (reloop != 0);
134	+ return old_value;
135	+}
136	+
137	+// This tests against any known ARMv5 variant.
138	+#elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \
139	+ defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)
140	+
141	+// The kernel also provides a helper function to perform an atomic
142	+// compare-and-swap operation at the hard-wired address 0xffff0fc0.
143	+// On ARMv5, this is implemented by a special code path that the kernel
144	+// detects and treats specially when thread pre-emption happens.
145	+// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
146	+//
147	+// Note that this always perform a full memory barrier, there is no
148	+// need to add calls MemoryBarrier() before or after it. It also
149	+// returns 0 on success, and 1 on exit.
150	+//
151	+// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
152	+// use newer kernel revisions, so this should not be a concern.
153	+namespace {
154	+
155	+inline int LinuxKernelCmpxchg(Atomic32 old_value,
156	+ Atomic32 new_value,
157	+ volatile Atomic32* ptr) {
158	+ typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);
159	+ return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
160	+}
161	+
162	+} // namespace
163	+
164	+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
165	+ Atomic32 old_value,
166	+ Atomic32 new_value) {
167	+ Atomic32 prev_value;
168	+ for (;;) {
169	+ prev_value = *ptr;
170	+ if (prev_value != old_value)
171	+ return prev_value;
172	+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
173	+ return old_value;
174	+ }
175	+}
176	+
177	+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
178	+ Atomic32 new_value) {
179	+ Atomic32 old_value;
180	+ do {
181	old_value = *ptr;
182	- } while (pLinuxKernelCmpxchg(old_value, new_value,
183	- const_cast<Atomic32*>(ptr)));
184	+ } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
185	return old_value;
186	}
187
188	@@ -65,36 +204,57 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi
189	// Atomic exchange the old value with an incremented one.
190	Atomic32 old_value = *ptr;
191	Atomic32 new_value = old_value + increment;
192	- if (pLinuxKernelCmpxchg(old_value, new_value,
193	- const_cast<Atomic32*>(ptr)) == 0) {
194	+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
195	// The exchange took place as expected.
196	return new_value;
197	}
198	// Otherwise, *ptr changed mid-loop and we need to retry.
199	}
200	-
201	}
202
203	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
204	Atomic32 old_value,
205	Atomic32 new_value) {
206	- return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
207	+ Atomic32 prev_value;
208	+ for (;;) {
209	+ prev_value = *ptr;
210	+ if (prev_value != old_value) {
211	+ // Always ensure acquire semantics.
212	+ MemoryBarrier();
213	+ return prev_value;
214	+ }
215	+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
216	+ return old_value;
217	+ }
218	}
219
220	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
221	Atomic32 old_value,
222	Atomic32 new_value) {
223	- return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
224	+ // This could be implemented as:
225	+ // MemoryBarrier();
226	+ // return NoBarrier_CompareAndSwap();
227	+ //
228	+ // But would use 3 barriers per succesful CAS. To save performance,
229	+ // use Acquire_CompareAndSwap(). Its implementation guarantees that:
230	+ // - A succesful swap uses only 2 barriers (in the kernel helper).
231	+ // - An early return due to (prev_value != old_value) performs
232	+ // a memory barrier with no store, which is equivalent to the
233	+ // generic implementation above.
234	+ return Acquire_CompareAndSwap(ptr, old_value, new_value);
235	}
236
237	+#else
238	+# error "Your CPU's ARM architecture is not supported yet"
239	+#endif
240	+
241	+// NOTE: Atomicity of the following load and store operations is only
242	+// guaranteed in case of 32-bit alignement of \|ptr\| values.
243	+
244	inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
245	*ptr = value;
246	}
247
248	-inline void MemoryBarrier() {
249	- pLinuxKernelMemoryBarrier();
250	-}
251	-
252	inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
253	*ptr = value;
254	MemoryBarrier();
255	@@ -105,9 +265,7 @@ inline void Release_Store(volatile Atomic32* ptr, Atom
256	*ptr = value;
257	}
258
259	-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
260	- return *ptr;
261	-}
262	+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
263
264	inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
265	Atomic32 value = *ptr;

Line 0 Link Here

(-)mail/thunderbird/files/patch-mozilla_media_libtheora_lib_arm_armcpu.c (+14 lines)
	1	--- mozilla/media/libtheora/lib/arm/armcpu.c.orig 2017-10-03 06:47:36 UTC
	2	+++ mozilla/media/libtheora/lib/arm/armcpu.c
	3	@@ -107,6 +107,11 @@ ogg_uint32_t oc_cpu_flags_get(void){
	4	return flags;
	5	}
	6
	7	+#elif defined(__FreeBSD__)
	8	+ogg_uint32_t oc_cpu_flags_get(void){
	9	+ return 0;
	10	+}
	11	+
	12	#else
	13	/*The feature registers which can tell us what the processor supports are
	14	accessible in priveleged modes only, so we can't have a general user-space

Line 0 Link Here

(-)mail/thunderbird/files/patch-mozilla_media_libtheora_moz.build (+60 lines)
	1	--- mozilla/media/libtheora/moz.build.orig 2017-04-11 04:13:12 UTC
	2	+++ mozilla/media/libtheora/moz.build
	3	@@ -87,31 +87,34 @@ if CONFIG['GNU_AS']:
	4	'lib/arm/armcpu.c',
	5	'lib/arm/armstate.c',
	6	]
	7	- for var in ('OC_ARM_ASM',
	8	- 'OC_ARM_ASM_EDSP',
	9	- 'OC_ARM_ASM_MEDIA',
	10	- 'OC_ARM_ASM_NEON'):
	11	- DEFINES[var] = True
	12	- # The Android NDK doesn't pre-define anything to indicate the OS it's
	13	- # on, so do it for them.
	14	- if CONFIG['OS_TARGET'] == 'Android':
	15	- DEFINES['__linux__'] = True
	16	+ if CONFIG['BUILD_ARM_NEON']:
	17	+ for var in ('OC_ARM_ASM',
	18	+ 'OC_ARM_ASM_EDSP',
	19	+ 'OC_ARM_ASM_MEDIA',
	20	+ 'OC_ARM_ASM_NEON'):
	21	+ DEFINES[var] = True
	22	+ # The Android NDK doesn't pre-define anything to indicate the OS it's
	23	+ # on, so do it for them.
	24	+ if CONFIG['OS_TARGET'] == 'Android':
	25	+ DEFINES['__linux__'] = True
	26	+
	27	+ SOURCES += [ '!%s.s' % f for f in [
	28	+ 'armbits-gnu',
	29	+ 'armfrag-gnu',
	30	+ 'armidct-gnu',
	31	+ 'armloop-gnu',
	32	+ ]]
	33	+
	34	+ # These flags are a lie; they're just used to enable the requisite
	35	+ # opcodes; actual arch detection is done at runtime.
	36	+ ASFLAGS += [
	37	+ '-march=armv7-a',
	38	+ ]
	39	+ ASFLAGS += CONFIG['NEON_FLAGS']
	40
	41	- SOURCES += [ '!%s.s' % f for f in [
	42	- 'armbits-gnu',
	43	- 'armfrag-gnu',
	44	- 'armidct-gnu',
	45	- 'armloop-gnu',
	46	- ]]
	47	-
	48	- # These flags are a lie; they're just used to enable the requisite
	49	- # opcodes; actual arch detection is done at runtime.
	50	- ASFLAGS += [
	51	- '-march=armv7-a',
	52	- ]
	53	- ASFLAGS += CONFIG['NEON_FLAGS']
	54	-
	55	if CONFIG['CLANG_CXX']:
	56	ASFLAGS += [
	57	'-no-integrated-as',
	58	+ '-Wa,-march=armv7-a',
	59	+ '-Wa,-mfpu=neon',
	60	]

Line 0 Link Here

(-)mail/thunderbird/files/patch-mozilla_toolkit_components_protobuf_src_google_protobuf_stubs_atomicops__internals__arm__gcc.h (+264 lines)
		1	--- mozilla/toolkit/components/protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h.orig 2017-10-31 10:44:14 UTC
		2	+++ mozilla/toolkit/components/protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h
		3	@@ -35,10 +35,17 @@
		4	#ifndef GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_ARM_GCC_H_
		5	#define GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_ARM_GCC_H_
		6
		7	+#if defined(__FreeBSD__)
		8	+#include <sys/types.h>
		9	+#include <machine/atomic.h>
		10	+#endif
		11	+
		12	namespace google {
		13	namespace protobuf {
		14	namespace internal {
		15
		16	+inline void MemoryBarrier() {
		17	+#if defined(__linux__) \|\| defined(__ANDROID__)
		18	// 0xffff0fc0 is the hard coded address of a function provided by
		19	// the kernel which implements an atomic compare-exchange. On older
		20	// ARM architecture revisions (pre-v6) this may be implemented using
		21	@@ -53,29 +60,160 @@ LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute
		22	typedef void (*LinuxKernelMemoryBarrierFunc)(void);
		23	LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
		24	(LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
		25	+#elif defined(__FreeBSD__)
		26	+ dsb();
		27	+#else
		28	+#error MemoryBarrier() is not implemented on this platform.
		29	+#endif
		30	+}
		31
		32	+// An ARM toolchain would only define one of these depending on which
		33	+// variant of the target architecture is being used. This tests against
		34	+// any known ARMv6 or ARMv7 variant, where it is possible to directly
		35	+// use ldrex/strex instructions to implement fast atomic operations.
		36	+#if defined(__ARM_ARCH_8A__) \|\| \
		37	+ defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \
		38	+ defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \
		39	+ defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \
		40	+ defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \
		41	+ defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__) \|\| \
		42	+ defined(__ARM_ARCH_6KZ__)
		43
		44	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
		45	Atomic32 old_value,
		46	Atomic32 new_value) {
		47	- Atomic32 prev_value = *ptr;
		48	+ Atomic32 prev_value;
		49	+ int reloop;
		50	do {
		51	- if (!pLinuxKernelCmpxchg(old_value, new_value,
		52	- const_cast<Atomic32*>(ptr))) {
		53	- return old_value;
		54	- }
		55	- prev_value = *ptr;
		56	- } while (prev_value == old_value);
		57	+ // The following is equivalent to:
		58	+ //
		59	+ // prev_value = LDREX(ptr)
		60	+ // reloop = 0
		61	+ // if (prev_value != old_value)
		62	+ // reloop = STREX(ptr, new_value)
		63	+ __asm__ __volatile__(" ldrex %0, [%3]\n"
		64	+ " mov %1, #0\n"
65	+ " cmp %0, %4\n"
66	+#ifdef __thumb2__
67	+ " it eq\n"
68	+#endif
69	+ " strexeq %1, %5, [%3]\n"
70	+ : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
71	+ : "r"(ptr), "r"(old_value), "r"(new_value)
72	+ : "cc", "memory");
73	+ } while (reloop != 0);
74	return prev_value;
75	}
76
77	+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
78	+ Atomic32 old_value,
79	+ Atomic32 new_value) {
80	+ Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
81	+ MemoryBarrier();
82	+ return result;
83	+}
84	+
85	+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
86	+ Atomic32 old_value,
87	+ Atomic32 new_value) {
88	+ MemoryBarrier();
89	+ return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
90	+}
91	+
92	+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
93	+ Atomic32 increment) {
94	+ Atomic32 value;
95	+ int reloop;
96	+ do {
97	+ // Equivalent to:
98	+ //
99	+ // value = LDREX(ptr)
100	+ // value += increment
101	+ // reloop = STREX(ptr, value)
102	+ //
103	+ __asm__ __volatile__(" ldrex %0, [%3]\n"
104	+ " add %0, %0, %4\n"
105	+ " strex %1, %0, [%3]\n"
106	+ : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
107	+ : "r"(ptr), "r"(increment)
108	+ : "cc", "memory");
109	+ } while (reloop);
110	+ return value;
111	+}
112	+
113	+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
114	+ Atomic32 increment) {
115	+ // TODO(digit): Investigate if it's possible to implement this with
116	+ // a single MemoryBarrier() operation between the LDREX and STREX.
117	+ // See http://crbug.com/246514
118	+ MemoryBarrier();
119	+ Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
120	+ MemoryBarrier();
121	+ return result;
122	+}
123	+
124	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
125	Atomic32 new_value) {
126	Atomic32 old_value;
127	+ int reloop;
128	do {
129	+ // old_value = LDREX(ptr)
130	+ // reloop = STREX(ptr, new_value)
131	+ __asm__ __volatile__(" ldrex %0, [%3]\n"
132	+ " strex %1, %4, [%3]\n"
133	+ : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
134	+ : "r"(ptr), "r"(new_value)
135	+ : "cc", "memory");
136	+ } while (reloop != 0);
137	+ return old_value;
138	+}
139	+
140	+// This tests against any known ARMv5 variant.
141	+#elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \
142	+ defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)
143	+
144	+// The kernel also provides a helper function to perform an atomic
145	+// compare-and-swap operation at the hard-wired address 0xffff0fc0.
146	+// On ARMv5, this is implemented by a special code path that the kernel
147	+// detects and treats specially when thread pre-emption happens.
148	+// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
149	+//
150	+// Note that this always perform a full memory barrier, there is no
151	+// need to add calls MemoryBarrier() before or after it. It also
152	+// returns 0 on success, and 1 on exit.
153	+//
154	+// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
155	+// use newer kernel revisions, so this should not be a concern.
156	+namespace {
157	+
158	+inline int LinuxKernelCmpxchg(Atomic32 old_value,
159	+ Atomic32 new_value,
160	+ volatile Atomic32* ptr) {
161	+ typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);
162	+ return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
163	+}
164	+
165	+} // namespace
166	+
167	+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
168	+ Atomic32 old_value,
169	+ Atomic32 new_value) {
170	+ Atomic32 prev_value;
171	+ for (;;) {
172	+ prev_value = *ptr;
173	+ if (prev_value != old_value)
174	+ return prev_value;
175	+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
176	+ return old_value;
177	+ }
178	+}
179	+
180	+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
181	+ Atomic32 new_value) {
182	+ Atomic32 old_value;
183	+ do {
184	old_value = *ptr;
185	- } while (pLinuxKernelCmpxchg(old_value, new_value,
186	- const_cast<Atomic32*>(ptr)));
187	+ } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
188	return old_value;
189	}
190
191	@@ -90,8 +228,7 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi
192	// Atomic exchange the old value with an incremented one.
193	Atomic32 old_value = *ptr;
194	Atomic32 new_value = old_value + increment;
195	- if (pLinuxKernelCmpxchg(old_value, new_value,
196	- const_cast<Atomic32*>(ptr)) == 0) {
197	+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
198	// The exchange took place as expected.
199	return new_value;
200	}
201	@@ -102,23 +239,46 @@ inline Atomic32 Barrier_AtomicIncrement(volatile Atomi
202	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
203	Atomic32 old_value,
204	Atomic32 new_value) {
205	- return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
206	+ Atomic32 prev_value;
207	+ for (;;) {
208	+ prev_value = *ptr;
209	+ if (prev_value != old_value) {
210	+ // Always ensure acquire semantics.
211	+ MemoryBarrier();
212	+ return prev_value;
213	+ }
214	+ if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
215	+ return old_value;
216	+ }
217	}
218
219	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
220	Atomic32 old_value,
221	Atomic32 new_value) {
222	- return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
223	+ // This could be implemented as:
224	+ // MemoryBarrier();
225	+ // return NoBarrier_CompareAndSwap();
226	+ //
227	+ // But would use 3 barriers per succesful CAS. To save performance,
228	+ // use Acquire_CompareAndSwap(). Its implementation guarantees that:
229	+ // - A succesful swap uses only 2 barriers (in the kernel helper).
230	+ // - An early return due to (prev_value != old_value) performs
231	+ // a memory barrier with no store, which is equivalent to the
232	+ // generic implementation above.
233	+ return Acquire_CompareAndSwap(ptr, old_value, new_value);
234	}
235
236	+#else
237	+# error "Your CPU's ARM architecture is not supported yet"
238	+#endif
239	+
240	+// NOTE: Atomicity of the following load and store operations is only
241	+// guaranteed in case of 32-bit alignement of \|ptr\| values.
242	+
243	inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
244	*ptr = value;
245	}
246
247	-inline void MemoryBarrier() {
248	- pLinuxKernelMemoryBarrier();
249	-}
250	-
251	inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
252	*ptr = value;
253	MemoryBarrier();
254	@@ -129,9 +289,7 @@ inline void Release_Store(volatile Atomic32* ptr, Atom
255	*ptr = value;
256	}
257
258	-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
259	- return *ptr;
260	-}
261	+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
262
263	inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
264	Atomic32 value = *ptr;