Implement store as an explicit atomic operation.

Using explicit atomic operations permits TSan to understand them and
prevents false positives.

Downgrading the atomic Load to acquire semantics. This reduces the
number of memory barriers inserted from two down to one at most.

Also renaming Load/Store to AcquireLoad/ReleaseStore.

BUG=chromium:512382
R=dvyukov@chromium.org, glider@chromium.org
TBR=tommi@webrtc.org

Review URL: https://codereview.webrtc.org/1246073002

Cr-Commit-Position: refs/heads/master@{#9613}
diff --git a/webrtc/base/atomicops.h b/webrtc/base/atomicops.h
index 80b2124..a863566 100644
--- a/webrtc/base/atomicops.h
+++ b/webrtc/base/atomicops.h
@@ -31,10 +31,10 @@
   static int Decrement(volatile int* i) {
     return ::InterlockedDecrement(reinterpret_cast<volatile LONG*>(i));
   }
-  static int Load(volatile const int* i) {
+  static int AcquireLoad(volatile const int* i) {
     return *i;
   }
-  static void Store(volatile int* i, int value) {
+  static void ReleaseStore(volatile int* i, int value) {
     *i = value;
   }
   static int CompareAndSwap(volatile int* i, int old_value, int new_value) {
@@ -49,13 +49,11 @@
   static int Decrement(volatile int* i) {
     return __sync_sub_and_fetch(i, 1);
   }
-  static int Load(volatile const int* i) {
-    // Adding 0 is a no-op, so const_cast is fine.
-    return __sync_add_and_fetch(const_cast<volatile int*>(i), 0);
+  static int AcquireLoad(volatile const int* i) {
+    return __atomic_load_n(i, __ATOMIC_ACQUIRE);
   }
-  static void Store(volatile int* i, int value) {
-    __sync_synchronize();
-    *i = value;
+  static void ReleaseStore(volatile int* i, int value) {
+    __atomic_store_n(i, value, __ATOMIC_RELEASE);
   }
   static int CompareAndSwap(volatile int* i, int old_value, int new_value) {
     return __sync_val_compare_and_swap(i, old_value, new_value);
diff --git a/webrtc/base/refcount.h b/webrtc/base/refcount.h
index 79b0591..49f3777 100644
--- a/webrtc/base/refcount.h
+++ b/webrtc/base/refcount.h
@@ -96,11 +96,11 @@
   }
 
   virtual int AddRef() {
-    return rtc::AtomicOps::Increment(&ref_count_);
+    return AtomicOps::Increment(&ref_count_);
   }
 
   virtual int Release() {
-    int count = rtc::AtomicOps::Decrement(&ref_count_);
+    int count = AtomicOps::Decrement(&ref_count_);
     if (!count) {
       delete this;
     }
@@ -114,7 +114,7 @@
   // barrier needed for the owning thread to act on the object, knowing that it
   // has exclusive access to the object.
   virtual bool HasOneRef() const {
-    return rtc::AtomicOps::Load(&ref_count_) == 1;
+    return AtomicOps::AcquireLoad(&ref_count_) == 1;
   }
 
  protected:
diff --git a/webrtc/system_wrappers/source/trace_impl.cc b/webrtc/system_wrappers/source/trace_impl.cc
index e6032d5..25da5a0 100644
--- a/webrtc/system_wrappers/source/trace_impl.cc
+++ b/webrtc/system_wrappers/source/trace_impl.cc
@@ -542,12 +542,12 @@
 
 // static
 void Trace::set_level_filter(int filter) {
-  rtc::AtomicOps::Store(&level_filter_, filter);
+  rtc::AtomicOps::ReleaseStore(&level_filter_, filter);
 }
 
 // static
 int Trace::level_filter() {
-  return rtc::AtomicOps::Load(&level_filter_);
+  return rtc::AtomicOps::AcquireLoad(&level_filter_);
 }
 
 // static
diff --git a/webrtc/video/video_capture_input.cc b/webrtc/video/video_capture_input.cc
index 8aae44f..1fbadff 100644
--- a/webrtc/video/video_capture_input.cc
+++ b/webrtc/video/video_capture_input.cc
@@ -63,7 +63,7 @@
   module_process_thread_->DeRegisterModule(overuse_detector_.get());
 
   // Stop the thread.
-  rtc::AtomicOps::Increment(&stop_);
+  rtc::AtomicOps::ReleaseStore(&stop_, 1);
   capture_event_.Set();
 
   // Stop the camera input.
@@ -128,7 +128,7 @@
   static const int kThreadWaitTimeMs = 100;
   int64_t capture_time = -1;
   if (capture_event_.Wait(kThreadWaitTimeMs) == kEventSignaled) {
-    if (rtc::AtomicOps::Load(&stop_))
+    if (rtc::AtomicOps::AcquireLoad(&stop_))
       return false;
 
     int64_t encode_start_time = -1;