add workaround to DoNotOptimize(...) for clang inline assembly bug.
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index a99ae9b..8e6a63a 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -190,10 +190,19 @@
 // expression from being optimized away by the compiler. This function is
 // intented to add little to no overhead.
 // See: http://stackoverflow.com/questions/28287064
-#if defined(__GNUC__)
+#if defined(__clang__) && defined(__GNUC__)
+// TODO(ericwf): Clang has a bug where it tries to always use a register
+// even if value must be stored in memory. This causes codegen to fail.
+// To work around this we remove the "r" modifier so the operand is always
+// loaded into memory.
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-    asm volatile("" : "+r" (const_cast<Tp&>(value)));
+    asm volatile("" : "+m" (const_cast<Tp&>(value)));
+}
+#elif defined(__GNUC__)
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+    asm volatile("" : "+rm" (const_cast<Tp&>(value)));
 }
 #else
 template <class Tp>