Initial commit
Benchmark library builds and runs but only single-threaded. Multithreaded
support needs a bit more love.
Currently requires some C++11 support (g++ 4.6.3 seems to work).
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5dfc638
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+CMakeCache.txt
+CMakeFiles/
+Makefile
+bin/
+cmake_install.cmake
+lib/
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..f363a47
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,43 @@
+cmake_minimum_required (VERSION 2.8)
+project (benchmark)
+
+find_package(Threads)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
+
+set(CMAKE_CXX_FLAGS "-Wall -Werror --std=c++0x")
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-fno-strict-aliasing -O3 -DNDEBUG")
+
+# Set OS
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+ add_definitions(-DOS_MACOSX)
+endif()
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+ add_definitions(-DOS_LINUX)
+endif()
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+ add_definitions(-DOS_WINDOWS)
+endif()
+
+# Set CPU
+if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86")
+ add_definitions(-DARCH_X86)
+endif()
+
+# Set up directories
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(${PROJECT_SOURCE_DIR}/src)
+link_directories(${PROJECT_SOURCE_DIR}/lib)
+
+# Build the targets
+FILE(GLOB SOURCE_FILES "src/*.cc")
+add_library(benchmark STATIC ${SOURCE_FILES})
+
+add_executable(benchmark_test test/benchmark_test.cc)
+target_link_libraries(benchmark_test benchmark ${CMAKE_THREAD_LIBS_INIT})
+
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
new file mode 100644
index 0000000..d5dec23
--- /dev/null
+++ b/include/benchmark/benchmark.h
@@ -0,0 +1,540 @@
+// Support for registering benchmarks for functions.
+
+/* Example usage:
+// Define a function that executes the code to be measured a
+// specified number of times:
+static void BM_StringCreation(benchmark::State& state) {
+ while (state.KeepRunning())
+ std::string empty_string;
+}
+
+// Register the function as a benchmark
+BENCHMARK(BM_StringCreation);
+
+// Define another benchmark
+static void BM_StringCopy(benchmark::State& state) {
+ std::string x = "hello";
+ while (state.KeepRunning())
+ std::string copy(x);
+}
+BENCHMARK(BM_StringCopy);
+
+// Augment the main() program to invoke benchmarks if specified
+// via the --benchmarks command line flag. E.g.,
+// my_unittest --benchmarks=all
+// my_unittest --benchmarks=BM_StringCreation
+// my_unittest --benchmarks=String
+// my_unittest --benchmarks='Copy|Creation'
+int main(int argc, char** argv) {
+ Initialize(&argc, argv);
+
+ RunSpecifiedBenchmarks();
+}
+
+// Sometimes a family of microbenchmarks can be implemented with
+// just one routine that takes an extra argument to specify which
+// one of the family of benchmarks to run. For example, the following
+// code defines a family of microbenchmarks for measuring the speed
+// of memcpy() calls of different lengths:
+
+static void BM_memcpy(benchmark::State& state) {
+ char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
+ memset(src, 'x', state.range_x());
+ while (state.KeepRunning()) {
+ memcpy(dst, src, state.range_x());
+ SetBenchmarkBytesProcessed(int64_t_t(state.iterations) * int64(state.range_x()));
+ delete[] src; delete[] dst;
+}
+BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
+
+// The preceding code is quite repetitive, and can be replaced with the
+// following short-hand. The following invocation will pick a few
+// appropriate arguments in the specified range and will generate a
+// microbenchmark for each such argument.
+BENCHMARK(BM_memcpy)->Range(8, 8<<10);
+
+// You might have a microbenchmark that depends on two inputs. For
+// example, the following code defines a family of microbenchmarks for
+// measuring the speed of set insertion.
+static void BM_SetInsert(benchmark::State& state) {
+ while (state.KeepRunning()) {
+ state.PauseTiming();
+ set<int> data = ConstructRandomSet(state.range_x());
+ state.ResumeTiming();
+ for (int j = 0; j < state.rangeY; ++j)
+ data.insert(RandomNumber());
+ }
+}
+BENCHMARK(BM_SetInsert)
+ ->ArgPair(1<<10, 1)
+ ->ArgPair(1<<10, 8)
+ ->ArgPair(1<<10, 64)
+ ->ArgPair(1<<10, 512)
+ ->ArgPair(8<<10, 1)
+ ->ArgPair(8<<10, 8)
+ ->ArgPair(8<<10, 64)
+ ->ArgPair(8<<10, 512);
+
+// The preceding code is quite repetitive, and can be replaced with
+// the following short-hand. The following macro will pick a few
+// appropriate arguments in the product of the two specified ranges
+// and will generate a microbenchmark for each such pair.
+BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
+
+// For more complex patterns of inputs, passing a custom function
+// to Apply allows programmatic specification of an
+// arbitrary set of arguments to run the microbenchmark on.
+// The following example enumerates a dense range on
+// one parameter, and a sparse range on the second.
+static benchmark::internal::Benchmark* CustomArguments(
+ benchmark::internal::Benchmark* b) {
+ for (int i = 0; i <= 10; ++i)
+ for (int j = 32; j <= 1024*1024; j *= 8)
+ b = b->ArgPair(i, j);
+ return b;
+}
+BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
+
+// Templated microbenchmarks work the same way:
+// Produce then consume 'size' messages 'iters' times
+// Measures throughput in the absence of multiprogramming.
+template <class Q> int BM_Sequential(benchmark::State& state) {
+ Q q;
+ typename Q::value_type v;
+ while (state.KeepRunning()) {
+ for (int i = state.range_x(); i--; )
+ q.push(v);
+ for (int e = state.range_x(); e--; )
+ q.Wait(&v);
+ }
+ // actually messages, not bytes:
+ state.SetBytesProcessed(
+ static_cast<int64_t>(state.iterations())*state.range_x());
+}
+BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
+
+In a multithreaded test, it is guaranteed that none of the threads will start
+until all have called KeepRunning, and all will have finished before KeepRunning
+returns false. As such, any global setup or teardown you want to do can be
+wrapped in a check against the thread index:
+
+static void BM_MultiThreaded(benchmark::State& state) {
+ if (state.thread_index == 0) {
+ // Setup code here.
+ }
+ while (state.KeepRunning()) {
+ // Run the test as normal.
+ }
+ if (state.thread_index == 0) {
+ // Teardown code here.
+ }
+}
+*/
+
+#ifndef BENCHMARK_BENCHMARK_H_
+#define BENCHMARK_BENCHMARK_H_
+
+#include <stdint.h>
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "macros.h"
+
+namespace benchmark {
+// If the --benchmarks flag is empty, do nothing.
+//
+// Otherwise, run all benchmarks specified by the --benchmarks flag,
+// and exit after running the benchmarks.
+extern void RunSpecifiedBenchmarks();
+
+// ------------------------------------------------------
+// Routines that can be called from within a benchmark
+
+//
+// REQUIRES: a benchmark is currently executing
+extern void SetLabel(const std::string& label);
+
+// If this routine is called, peak memory allocation past this point in the
+// benchmark is reported at the end of the benchmark report line. (It is
+// computed by running the benchmark once with a single iteration and a memory
+// tracer.)
+extern void MemoryUsage();
+
+// If a particular benchmark is I/O bound, or if for some reason CPU
+// timings are not representative, call this method from within the
+// benchmark routine. If called, the elapsed time will be used to
+// control how many iterations are run, and in the printing of
+// items/second or MB/seconds values. If not called, the cpu time
+// used by the benchmark will be used.
+extern void UseRealTime();
+
+namespace internal {
+class Benchmark;
+}
+
+// State is passed to a running Benchmark and contains state for the
+// benchmark to use.
+class State {
+ public:
+ // Returns true iff the benchmark should continue through another iteration.
+ bool KeepRunning();
+
+ void PauseTiming();
+ void ResumeTiming();
+
+ // Set the number of bytes processed by the current benchmark
+ // execution. This routine is typically called once at the end of a
+ // throughput oriented benchmark. If this routine is called with a
+ // value > 0, the report is printed in MB/sec instead of nanoseconds
+ // per iteration.
+ //
+ // REQUIRES: a benchmark has exited its KeepRunning loop.
+ void SetBytesProcessed(int64_t bytes);
+
+ // If this routine is called with items > 0, then an items/s
+ // label is printed on the benchmark report line for the currently
+ // executing benchmark. It is typically called at the end of a processing
+ // benchmark where a processing items/second output is desired.
+ //
+ // REQUIRES: a benchmark has exited its KeepRunning loop.
+ void SetItemsProcessed(int64_t items);
+
+ // If this routine is called, the specified label is printed at the
+ // end of the benchmark report line for the currently executing
+ // benchmark. Example:
+ // static void BM_Compress(int iters) {
+ // ...
+ // double compress = input_size / output_size;
+ // benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
+ // }
+ // Produces output that looks like:
+ // BM_Compress 50 50 14115038 compress:27.3%
+ //
+ // REQUIRES: a benchmark has exited its KeepRunning loop.
+ void SetLabel(const std::string& label);
+
+ // Range arguments for this run. CHECKs if the argument has been set.
+ int range_x() const;
+ int range_y() const;
+
+ int iterations() const { return total_iterations_; }
+
+ const int thread_index;
+
+ private:
+ class FastClock;
+ struct SharedState;
+
+ State(FastClock* clock, SharedState* s, int t);
+ bool StartRunning();
+ bool FinishInterval();
+ bool MaybeStop();
+ void NewInterval();
+ bool AllStarting();
+ bool RunAnotherInterval() const;
+
+ void Run();
+
+ enum EState {
+ STATE_INITIAL, // KeepRunning hasn't been called
+ STATE_STARTING, // KeepRunning called, waiting for other threads
+ STATE_RUNNING, // Running and being timed
+ STATE_STOPPING, // Not being timed but waiting for other threads
+ STATE_STOPPED, // Stopped
+ } state_;
+
+ FastClock* clock_;
+
+ // State shared by all BenchmarkRun objects that belong to the same
+ // BenchmarkInstance
+ SharedState* shared_;
+
+ // Custom label set by the user.
+ std::string label_;
+
+ // Each State object goes through a sequence of measurement intervals. By
+ // default each interval is approx. 100ms in length. The following stats are
+ // kept for each interval.
+ int64_t iterations_;
+ double start_cpu_;
+ double start_time_;
+ int64_t stop_time_micros_;
+
+ double start_pause_;
+ double pause_time_;
+
+ // Total number of iterations for all finished runs.
+ int64_t total_iterations_;
+
+ // Approximate time in microseconds for one interval of execution.
+ // Dynamically adjusted as needed.
+ int64_t interval_micros_;
+
+ // True if the current interval is the continuation of a previous one.
+ bool is_continuation_;
+
+ friend class internal::Benchmark;
+ DISALLOW_COPY_AND_ASSIGN(State);
+};
+
+namespace internal {
+class BenchmarkReporter;
+
+typedef std::function<void(State&)> BenchmarkFunction;
+
+// Run all benchmarks whose name is a partial match for the regular
+// expression in "spec". The results of benchmark runs are fed to "reporter".
+void RunMatchingBenchmarks(const std::string& spec,
+ BenchmarkReporter* reporter);
+
+// Extract the list of benchmark names that match the specified regular
+// expression.
+void FindMatchingBenchmarkNames(const std::string& re,
+ std::vector<std::string>* benchmark_names);
+
+// ------------------------------------------------------
+// Benchmark registration object. The BENCHMARK() macro expands
+// into an internal::Benchmark* object. Various methods can
+// be called on this object to change the properties of the benchmark.
+// Each method returns "this" so that multiple method calls can
+// chained into one expression.
+class Benchmark {
+ public:
+ // The Benchmark takes ownership of the Callback pointed to by f.
+ Benchmark(const char* name, BenchmarkFunction f);
+
+ ~Benchmark();
+
+ // Note: the following methods all return "this" so that multiple
+ // method calls can be chained together in one expression.
+
+ // Run this benchmark once with "x" as the extra argument passed
+ // to the function.
+ // REQUIRES: The function passed to the constructor must accept an arg1.
+ Benchmark* Arg(int x);
+
+ // Run this benchmark once for a number of values picked from the
+ // range [start..limit]. (start and limit are always picked.)
+ // REQUIRES: The function passed to the constructor must accept an arg1.
+ Benchmark* Range(int start, int limit);
+
+ // Run this benchmark once for every value in the range [start..limit]
+ // REQUIRES: The function passed to the constructor must accept an arg1.
+ Benchmark* DenseRange(int start, int limit);
+
+ // Run this benchmark once with "x,y" as the extra arguments passed
+ // to the function.
+ // REQUIRES: The function passed to the constructor must accept arg1,arg2.
+ Benchmark* ArgPair(int x, int y);
+
+ // Pick a set of values A from the range [lo1..hi1] and a set
+ // of values B from the range [lo2..hi2]. Run the benchmark for
+ // every pair of values in the cartesian product of A and B
+ // (i.e., for all combinations of the values in A and B).
+ // REQUIRES: The function passed to the constructor must accept arg1,arg2.
+ Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
+
+ // Pass this benchmark object to *func, which can customize
+ // the benchmark by calling various methods like Arg, ArgPair,
+ // Threads, etc.
+ Benchmark* Apply(void (*func)(Benchmark* benchmark));
+
+ // Support for running multiple copies of the same benchmark concurrently
+ // in multiple threads. This may be useful when measuring the scaling
+ // of some piece of code.
+
+ // Run one instance of this benchmark concurrently in t threads.
+ Benchmark* Threads(int t);
+
+ // Pick a set of values T from [min_threads,max_threads].
+ // min_threads and max_threads are always included in T. Run this
+ // benchmark once for each value in T. The benchmark run for a
+ // particular value t consists of t threads running the benchmark
+ // function concurrently. For example, consider:
+ // BENCHMARK(Foo)->ThreadRange(1,16);
+ // This will run the following benchmarks:
+ // Foo in 1 thread
+ // Foo in 2 threads
+ // Foo in 4 threads
+ // Foo in 8 threads
+ // Foo in 16 threads
+ Benchmark* ThreadRange(int min_threads, int max_threads);
+
+ // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
+ Benchmark* ThreadPerCpu();
+
+ // TODO(dominich): Control whether or not real-time is used for this benchmark
+ // TODO(dominich): Control the default number of iterations
+
+ // -------------------------------
+ // Following methods are not useful for clients
+
+ // Used inside the benchmark implementation
+ struct Instance;
+ struct ThreadStats;
+
+ // Extract the list of benchmark instances that match the specified
+ // regular expression.
+ static void FindBenchmarks(const std::string& re,
+ std::vector<Instance>* benchmarks);
+
+ // Measure the overhead of an empty benchmark to subtract later.
+ static void MeasureOverhead();
+
+ private:
+ std::vector<Benchmark::Instance> CreateBenchmarkInstances(int rangeXindex,
+ int rangeYindex);
+
+ std::string name_;
+ BenchmarkFunction function_;
+ int registration_index_;
+ std::vector<int> rangeX_;
+ std::vector<int> rangeY_;
+ std::vector<int> thread_counts_;
+
+ // Special value placed in thread_counts_ to stand for NumCPUs()
+ static const int kNumCpuMarker = -1;
+
+ // Special value used to indicate that no range is required.
+ static const int kNoRange = -1;
+
+ static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
+ static double MeasurePeakHeapMemory(const Instance& b);
+ static void RunInstance(const Instance& b, BenchmarkReporter* br);
+ friend class ::benchmark::State;
+ friend struct ::benchmark::internal::Benchmark::Instance;
+ friend void ::benchmark::internal::RunMatchingBenchmarks(
+ const std::string&, BenchmarkReporter*);
+ DISALLOW_COPY_AND_ASSIGN(Benchmark);
+};
+
+// ------------------------------------------------------
+// Benchmarks reporter interface + data containers.
+
+struct BenchmarkContextData {
+ int num_cpus;
+ double mhz_per_cpu;
+ //std::string cpu_info;
+ bool cpu_scaling_enabled;
+
+ // The number of chars in the longest benchmark name.
+ int name_field_width;
+};
+
+struct BenchmarkRunData {
+ BenchmarkRunData() :
+ thread_index(-1),
+ iterations(1),
+ real_accumulated_time(0),
+ cpu_accumulated_time(0),
+ bytes_per_second(0),
+ items_per_second(0),
+ max_heapbytes_used(0) {}
+
+ std::string benchmark_name;
+ std::string report_label;
+ int thread_index;
+ int64_t iterations;
+ double real_accumulated_time;
+ double cpu_accumulated_time;
+
+ // Zero if not set by benchmark.
+ double bytes_per_second;
+ double items_per_second;
+
+ // This is set to 0.0 if memory tracing is not enabled.
+ double max_heapbytes_used;
+};
+
+// Interface for custom benchmark result printers.
+// By default, benchmark reports are printed to stdout. However an application
+// can control the destination of the reports by calling
+// RunMatchingBenchmarks and passing it a custom reporter object.
+// The reporter object must implement the following interface.
+class BenchmarkReporter {
+ public:
+ // Called once for every suite of benchmarks run.
+ // The parameter "context" contains information that the
+ // reporter may wish to use when generating its report, for example the
+ // platform under which the benchmarks are running. The benchmark run is
+ // never started if this function returns false, allowing the reporter
+ // to skip runs based on the context information.
+ virtual bool ReportContext(const BenchmarkContextData& context) = 0;
+
+ // Called once for each group of benchmark runs, gives information about
+ // cpu-time and heap memory usage during the benchmark run.
+ // Note that all the grouped benchmark runs should refer to the same
+ // benchmark, thus have the same name.
+ virtual void ReportRuns(const std::vector<BenchmarkRunData>& report) = 0;
+
+ virtual ~BenchmarkReporter();
+};
+
+
+// ------------------------------------------------------
+// Internal implementation details follow; please ignore
+
+// Given a collection of reports, computes their mean and stddev.
+// REQUIRES: all runs in "reports" must be from the same benchmark.
+void ComputeStats(const std::vector<BenchmarkRunData>& reports,
+ BenchmarkRunData* mean_data,
+ BenchmarkRunData* stddev_data);
+
+// Simple reporter that outputs benchmark data to the console. This is the
+// default reporter used by RunSpecifiedBenchmarks().
+class ConsoleReporter : public BenchmarkReporter {
+ public:
+ virtual bool ReportContext(const BenchmarkContextData& context);
+ virtual void ReportRuns(const std::vector<BenchmarkRunData>& reports);
+ private:
+ std::string PrintMemoryUsage(double bytes);
+ virtual void PrintRunData(const BenchmarkRunData& report);
+ int name_field_width_;
+};
+
+} // end namespace internal
+
+void Initialize(int* argc, const char** argv);
+} // end namespace benchmark
+
+// ------------------------------------------------------
+// Macro to register benchmarks
+
+// Helpers for generating unique variable names
+#define BENCHMARK_CONCAT(a, b, c) BENCHMARK_CONCAT2(a, b, c)
+#define BENCHMARK_CONCAT2(a, b, c) a ## b ## c
+
+#define BENCHMARK(n) \
+ static ::benchmark::internal::Benchmark* \
+ BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
+ (new ::benchmark::internal::Benchmark(#n, n))
+
+// Old-style macros
+#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
+#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
+#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
+#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
+ BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
+
+// This will register a benchmark for a templatized function. For example:
+//
+// template<int arg>
+// void BM_Foo(int iters);
+//
+// BENCHMARK_TEMPLATE(BM_Foo, 1);
+//
+// will register BM_Foo<1> as a benchmark.
+#define BENCHMARK_TEMPLATE(n, a) \
+ static ::benchmark::internal::Benchmark* \
+ BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
+ (new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>))
+
+#define BENCHMARK_TEMPLATE2(n, a, b) \
+ static ::benchmark::internal::Benchmark* \
+ BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
+ (new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))
+
+#endif // BENCHMARK_BENCHMARK_H_
+
diff --git a/include/benchmark/macros.h b/include/benchmark/macros.h
new file mode 100644
index 0000000..8c2df94
--- /dev/null
+++ b/include/benchmark/macros.h
@@ -0,0 +1,120 @@
+#ifndef BENCHMARK_MACROS_H_
+#define BENCHMARK_MACROS_H_
+
+#include <assert.h>
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&);
+
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example. If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+//
+// One caveat is that, for C++03, arraysize() doesn't accept any array of
+// an anonymous type or a type defined inside a function. In these rare
+// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
+// due to a limitation in C++03's template system. The limitation has
+// been removed in C++11.
+
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+
+// That gcc wants both of these prototypes seems mysterious. VC, for
+// its part, can't decide which to use (another mystery). Matching of
+// template overloads: the final frontier.
+#ifndef COMPILER_MSVC
+template <typename T, size_t N>
+char (&ArraySizeHelper(const T (&array)[N]))[N];
+#endif
+
+#define arraysize(array) (sizeof(ArraySizeHelper(array)))
+
+// The STATIC_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
+// content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+
+template <bool>
+struct StaticAssert {
+};
+
+#define STATIC_ASSERT(expr, msg) \
+ typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+
+// Implementation details of STATIC_ASSERT:
+//
+// - STATIC_ASSERT works by defining an array type that has -1
+// elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+// does not work, as gcc supports variable-length arrays whose sizes
+// are determined at run-time (this is gcc's extension and not part
+// of the C++ standard). As a result, gcc fails to reject the
+// following code with the simple definition:
+//
+// int foo;
+// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
+// // not a compile-time constant.
+//
+// - By using the type StaticAssert<(bool(expr))>, we ensures that
+// expr is a compile-time constant. (Template arguments must be
+// determined at compile-time.)
+//
+// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
+// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
+//
+// StaticAssert<bool(expr)>
+//
+// instead, these compilers will refuse to compile
+//
+// STATIC_ASSERT(5 > 0, some_message);
+//
+// (They seem to think the ">" in "5 > 0" marks the end of the
+// template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+// ((expr) ? 1 : -1).
+//
+// This is to avoid running into a bug in MS VC 7.1, which
+// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+
+#define CHECK(b) do { if (!(b)) assert(false); } while(0)
+#define CHECK_EQ(a, b) CHECK((a) == (b))
+#define CHECK_GE(a, b) CHECK((a) >= (b))
+#define CHECK_LE(a, b) CHECK((a) <= (b))
+#define CHECK_GT(a, b) CHECK((a) > (b))
+#define CHECK_LT(a, b) CHECK((a) < (b))
+
+//
+// Prevent the compiler from complaining about or optimizing away variables
+// that appear unused.
+#define ATTRIBUTE_UNUSED __attribute__ ((unused))
+
+//
+// For functions we want to force inline or not inline.
+// Introduced in gcc 3.1.
+#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
+#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
+#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
+#define HAVE_ATTRIBUTE_NOINLINE 1
+
+#endif // BENCHMARK_MACROS_H_
diff --git a/src/benchmark.cc b/src/benchmark.cc
new file mode 100644
index 0000000..7a4de8e
--- /dev/null
+++ b/src/benchmark.cc
@@ -0,0 +1,1197 @@
+#include "benchmark/benchmark.h"
+#include "benchmark/macros.h"
+#include "colorprint.h"
+#include "commandlineflags.h"
+#include "mutex_lock.h"
+#include "sleep.h"
+#include "stat.h"
+#include "sysinfo.h"
+#include "walltime.h"
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <string.h>
+
+#if defined OS_FREEBSD
+#include <gnuregex.h>
+#else
+#include <regex.h>
+#endif
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+DEFINE_string(benchmark_filter, ".",
+ "A regular expression that specifies the set of benchmarks "
+ "to execute. If this flag is empty, no benchmarks are run. "
+ "If this flag is the string \"all\", all benchmarks linked "
+ "into the process are run.");
+
+DEFINE_int32(benchmark_min_iters, 100,
+ "Minimum number of iterations per benchmark");
+
+DEFINE_int32(benchmark_max_iters, 1000000000,
+ "Maximum number of iterations per benchmark");
+
+DEFINE_double(benchmark_min_time, 0.5,
+ "Minimum number of seconds we should run benchmark before "
+ "results are considered significant. For cpu-time based "
+ "tests, this is the lower bound on the total cpu time "
+ "used by all threads that make up the test. For real-time "
+ "based tests, this is the lower bound on the elapsed time "
+ "of the benchmark execution, regardless of number of "
+ "threads.");
+
+DEFINE_bool(benchmark_memory_usage, false,
+ "Report memory usage for all benchmarks");
+
+DEFINE_int32(benchmark_repetitions, 1,
+ "The number of runs of each benchmark. If greater than 1, the "
+ "mean and standard deviation of the runs will be reported.");
+
+DEFINE_int32(v, 0, "The level of verbose logging to output");
+DEFINE_bool(color_print, true, "Enables colorized logging.");
+
+// Will be non-empty if heap checking is turned on, which would
+// invalidate any benchmarks.
+DECLARE_string(heap_check);
+
+// The ""'s catch people who don't pass in a literal for "str"
+#define strliterallen(str) (sizeof("" str "")-1)
+
+// Must use a string literal for prefix.
+#define memprefix(str, len, prefix) \
+ ( (((len) >= strliterallen(prefix)) \
+ && memcmp(str, prefix, strliterallen(prefix)) == 0) \
+ ? str + strliterallen(prefix) \
+ : NULL )
+
+namespace benchmark {
+
+namespace {
+
+// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
+static const char kBigSIUnits[] = "kMGTPEZY";
+// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
+static const char kBigIECUnits[] = "KMGTPEZY";
+// milli, micro, nano, pico, femto, atto, zepto, yocto.
+static const char kSmallSIUnits[] = "munpfazy";
+
+// We require that all three arrays have the same size.
+STATIC_ASSERT(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
+ SI_and_IEC_unit_arrays_must_be_the_same_size);
+STATIC_ASSERT(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
+ Small_SI_and_Big_SI_unit_arrays_must_be_the_same_size);
+static const int kUnitsSize = arraysize(kBigSIUnits);
+
+void ToExponentAndMantissa(double val, double thresh,
+ int precision, double one_k,
+ std::string* mantissa, int* exponent) {
+ std::stringstream mantissa_stream;
+
+ if (val < 0) {
+ mantissa_stream << "-";
+ val = -val;
+ }
+
+ // Adjust threshold so that it never excludes things which can't be rendered
+ // in 'precision' digits.
+ const double adjusted_threshold =
+ std::max(thresh, 1.0 / pow(10.0, precision));
+ const double big_threshold = adjusted_threshold * one_k;
+ const double small_threshold = adjusted_threshold;
+
+ if (val > big_threshold) {
+ // Positive powers
+ double scaled = val;
+ for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) {
+ scaled /= one_k;
+ if (scaled <= big_threshold) {
+ mantissa_stream << scaled;
+ *exponent = i + 1;
+ *mantissa = mantissa_stream.str();
+ return;
+ }
+ }
+ mantissa_stream << val;
+ *exponent = 0;
+ } else if (val < small_threshold) {
+ // Negative powers
+ double scaled = val;
+ for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) {
+ scaled *= one_k;
+ if (scaled >= small_threshold) {
+ mantissa_stream << scaled;
+ *exponent = -i - 1;
+ *mantissa = mantissa_stream.str();
+ return;
+ }
+ }
+ mantissa_stream << val;
+ *exponent = 0;
+ } else {
+ mantissa_stream << val;
+ *exponent = 0;
+ }
+ *mantissa = mantissa_stream.str();
+}
+
+std::string ExponentToPrefix(int exponent, bool iec) {
+ if (exponent == 0)
+ return "";
+
+ const int index = (exponent > 0 ? exponent - 1 : -exponent - 1);
+ if (index >= kUnitsSize)
+ return "";
+
+ const char *array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) :
+ kSmallSIUnits);
+ if (iec)
+ return array[index] + std::string("i");
+ else
+ return std::string(1, array[index]);
+}
+
+std::string ToBinaryStringFullySpecified(double value, double threshold,
+ int precision) {
+ std::string mantissa;
+ int exponent;
+ ToExponentAndMantissa(value, threshold, precision, 1024., &mantissa,
+ &exponent);
+ return mantissa + ExponentToPrefix(exponent, false);
+}
+
+inline void AppendHumanReadable(int n, std::string* str) {
+ std::stringstream ss;
+ // Round down to the nearest SI prefix.
+ ss << "/" << ToBinaryStringFullySpecified(n, 1.0, 0);
+ *str += ss.str();
+}
+
+inline std::string HumanReadableNumber(double n) {
+ // 1.1 means that figures up to 1.1k should be shown with the next unit down;
+ // this softens edge effects.
+ // 1 means that we should show one decimal place of precision.
+ return ToBinaryStringFullySpecified(n, 1.1, 1);
+}
+
+} // end namespace
+
+namespace internal {
+struct Benchmark::ThreadStats {
+ int64_t bytes_processed;
+ int64_t items_processed;
+
+ ThreadStats() { Reset(); }
+
+ void Reset() {
+ bytes_processed = 0;
+ items_processed = 0;
+ }
+
+ void Add(const ThreadStats& other) {
+ bytes_processed += other.bytes_processed;
+ items_processed += other.items_processed;
+ }
+};
+
+} // end namespace internal
+
+namespace {
+
+// Per-thread stats
+pthread_key_t thread_stats_key;
+internal::Benchmark::ThreadStats* thread_stats = nullptr;
+
+// For non-dense Range, intermediate values are powers of kRangeMultiplier.
+static const int kRangeMultiplier = 8;
+
+// List of all registered benchmarks. Note that each registered
+// benchmark identifies a family of related benchmarks to run.
+static pthread_mutex_t benchmark_mutex;
+static std::vector<internal::Benchmark*>* families = NULL;
+
+bool running_benchmark = false;
+
+// Should this benchmark report memory usage?
+bool get_memory_usage;
+
+// Should this benchmark base decisions off of real time rather than
+// cpu time?
+bool use_real_time;
+
+// Overhead of an empty benchmark.
+double overhead = 0.0;
+
+void DeleteThreadStats(void* p) {
+ delete (internal::Benchmark::ThreadStats*) p;
+}
+
+// Return prefix to print in front of each reported line
+const char* Prefix() {
+#ifdef NDEBUG
+ return "";
+#else
+ return "DEBUG: ";
+#endif
+}
+
+// TODO
+//static internal::MallocCounter *benchmark_mc;
+
+static bool CpuScalingEnabled() {
+ // On Linux, the CPUfreq subsystem exposes CPU information as files on the
+ // local file system. If reading the exported files fails, then we may not be
+ // running on Linux, so we silently ignore all the read errors.
+ for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
+ std::stringstream ss;
+ ss << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_governor";
+ std::string governor_file = ss.str();
+ FILE* file = fopen(governor_file.c_str(), "r");
+ if (!file)
+ break;
+ char buff[16];
+ size_t bytes_read = fread(buff, 1, sizeof(buff), file);
+ fclose(file);
+ if (memprefix(buff, bytes_read, "performance") == NULL)
+ return true;
+ }
+ return false;
+}
+
+} // namespace
+
+namespace internal {
+
+BenchmarkReporter::~BenchmarkReporter() {}
+
+void ComputeStats(const std::vector<BenchmarkRunData>& reports,
+ BenchmarkRunData* mean_data,
+ BenchmarkRunData* stddev_data) {
+ // Accumulators.
+ Stat1_d real_accumulated_time_stat;
+ Stat1_d cpu_accumulated_time_stat;
+ Stat1_d bytes_per_second_stat;
+ Stat1_d items_per_second_stat;
+ Stat1MinMax_d max_heapbytes_used_stat;
+ int total_iters = 0;
+
+ // Populate the accumulators.
+ for (std::vector<BenchmarkRunData>::const_iterator it = reports.begin();
+ it != reports.end(); ++it) {
+ CHECK_EQ(reports[0].benchmark_name, it->benchmark_name);
+ total_iters += it->iterations;
+ real_accumulated_time_stat +=
+ Stat1_d(it->real_accumulated_time/it->iterations, it->iterations);
+ cpu_accumulated_time_stat +=
+ Stat1_d(it->cpu_accumulated_time/it->iterations, it->iterations);
+ items_per_second_stat += Stat1_d(it->items_per_second, it->iterations);
+ bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations);
+ max_heapbytes_used_stat += Stat1MinMax_d(it->max_heapbytes_used,
+ it->iterations);
+ }
+
+ // Get the data from the accumulator to BenchmarkRunData's.
+ mean_data->benchmark_name = reports[0].benchmark_name + "_mean";
+ mean_data->iterations = total_iters;
+ mean_data->real_accumulated_time = real_accumulated_time_stat.Sum();
+ mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Sum();
+ mean_data->bytes_per_second = bytes_per_second_stat.Mean();
+ mean_data->items_per_second = items_per_second_stat.Mean();
+ mean_data->max_heapbytes_used = max_heapbytes_used_stat.Max();
+
+ // Only add label to mean/stddev if it is same for all runs
+ mean_data->report_label = reports[0].report_label;
+ for (size_t i = 1; i < reports.size(); i++) {
+ if (reports[i].report_label != reports[0].report_label) {
+ mean_data->report_label = "";
+ break;
+ }
+ }
+
+ stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev";
+ stddev_data->report_label = mean_data->report_label;
+ stddev_data->iterations = total_iters;
+ // We multiply by total_iters since PrintRunData expects a total time.
+ stddev_data->real_accumulated_time =
+ real_accumulated_time_stat.StdDev() * total_iters;
+ stddev_data->cpu_accumulated_time =
+ cpu_accumulated_time_stat.StdDev() * total_iters;
+ stddev_data->bytes_per_second = bytes_per_second_stat.StdDev();
+ stddev_data->items_per_second = items_per_second_stat.StdDev();
+ stddev_data->max_heapbytes_used = max_heapbytes_used_stat.StdDev();
+}
+
+std::string ConsoleReporter::PrintMemoryUsage(double bytes) {
+ if (!get_memory_usage || bytes < 0.0)
+ return "";
+
+ std::stringstream ss;
+ ss << " " << HumanReadableNumber(bytes) << "B peak-mem";
+ return ss.str();
+}
+
+bool ConsoleReporter::ReportContext(const BenchmarkContextData& context) {
+ name_field_width_ = context.name_field_width;
+
+ std::cout << "Benchmarking on " << context.num_cpus << " X "
+ << context.mhz_per_cpu << " MHz CPU"
+ << ((context.num_cpus > 1) ? "s" : "") << "\n";
+
+ int remainder_ms;
+ char time_buf[32];
+ std::cout << walltime::Print(walltime::Now(), "%Y/%m/%d-%H:%M:%S",
+ true, // use local timezone
+ time_buf, &remainder_ms) << "\n";
+
+ // Show details of CPU model, caches, TLBs etc.
+// if (!context.cpu_info.empty())
+// std::cout << "CPU: " << context.cpu_info.c_str();
+
+ if (context.cpu_scaling_enabled) {
+ std::cerr << "CPU scaling is enabled: Benchmark timings may be noisy.\n";
+ }
+
+ int output_width = fprintf(stdout, "%s%-*s %10s %10s %10s\n",
+ Prefix(), name_field_width_, "Benchmark",
+ "Time(ns)", "CPU(ns)", "Iterations");
+ std::cout << std::string(output_width - 1, '-').c_str() << "\n";
+
+ return true;
+}
+
+void ConsoleReporter::ReportRuns(const std::vector<BenchmarkRunData>& reports) {
+ for (std::vector<BenchmarkRunData>::const_iterator it = reports.begin();
+ it != reports.end(); ++it) {
+ CHECK_EQ(reports[0].benchmark_name, it->benchmark_name);
+ PrintRunData(*it);
+ }
+
+ // We don't report aggregated data if there was a single run.
+ if (reports.size() < 2)
+ return;
+
+ BenchmarkRunData mean_data;
+ BenchmarkRunData stddev_data;
+ internal::ComputeStats(reports, &mean_data, &stddev_data);
+
+ // Output using PrintRun.
+ PrintRunData(mean_data);
+ PrintRunData(stddev_data);
+ fprintf(stdout, "\n");
+}
+
+void ConsoleReporter::PrintRunData(const BenchmarkRunData& result) {
+ // Format bytes per second
+ std::string rate;
+ if (result.bytes_per_second > 0) {
+ std::stringstream ss;
+ ss << " " << HumanReadableNumber(result.bytes_per_second) << "B/s";
+ rate = ss.str();
+ }
+
+ // Format items per second
+ std::string items;
+ if (result.items_per_second > 0) {
+ std::stringstream ss;
+ ss << " " << HumanReadableNumber(result.items_per_second) << " items/s";
+ items = ss.str();
+ }
+
+ ColorPrintf(COLOR_DEFAULT, "%s", Prefix());
+ ColorPrintf(COLOR_GREEN, "%-*s ",
+ name_field_width_, result.benchmark_name.c_str());
+ ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ",
+ (result.real_accumulated_time * 1e9) /
+ (static_cast<double>(result.iterations)),
+ (result.cpu_accumulated_time * 1e9) /
+ (static_cast<double>(result.iterations)));
+ ColorPrintf(COLOR_CYAN, "%10lld", result.iterations);
+ ColorPrintf(COLOR_DEFAULT, "%*s %s %s%s\n", 16, rate.c_str(), items.c_str(),
+ result.report_label.c_str(),
+ PrintMemoryUsage(result.max_heapbytes_used).c_str());
+}
+
+void MemoryUsage() {
+ //if (benchmark_mc) {
+ // benchmark_mc->Reset();
+ //} else {
+ get_memory_usage = true;
+ //}
+}
+
+void UseRealTime() {
+ use_real_time = true;
+}
+
+void PrintUsageAndExit() {
+ fprintf(stdout, "benchmark [--benchmark_filter=<regex>]\n"
+ " [--benchmark_min_iters=<min_iters>]\n"
+ " [--benchmark_max_iters=<max_iters>]\n"
+ " [--benchmark_min_time=<min_time>]\n"
+// " [--benchmark_memory_usage]\n"
+ " [--benchmark_repetitions=<num_repetitions>]\n"
+ " [--color_print={true|false}]\n"
+ " [--v=<verbosity>]\n");
+ exit(0);
+}
+
+void ParseCommandLineFlags(int* argc, const char** argv) {
+ for (int i = 1; i < *argc; ++i) {
+ if (ParseStringFlag(argv[i], "benchmark_filter",
+ &FLAGS_benchmark_filter) ||
+ ParseInt32Flag(argv[i], "benchmark_min_iters",
+ &FLAGS_benchmark_min_iters) ||
+ ParseInt32Flag(argv[i], "benchmark_max_iters",
+ &FLAGS_benchmark_max_iters) ||
+ ParseDoubleFlag(argv[i], "benchmark_min_time",
+ &FLAGS_benchmark_min_time) ||
+ // TODO(dominic)
+// ParseBoolFlag(argv[i], "gbenchmark_memory_usage",
+// &FLAGS_gbenchmark_memory_usage) ||
+ ParseInt32Flag(argv[i], "benchmark_repetitions",
+ &FLAGS_benchmark_repetitions) ||
+ ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) ||
+ ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
+ for (int j = i; j != *argc; ++j)
+ argv[j] = argv[j + 1];
+
+ --(*argc);
+ --i;
+ } else if (IsFlag(argv[i], "help"))
+ PrintUsageAndExit();
+ }
+}
+
+} // end namespace internal
+
+// A clock that provides a fast mechanism to check if we're nearly done.
+class State::FastClock {
+ public:
+ enum Type { REAL_TIME, CPU_TIME };
+ explicit FastClock(Type type)
+ : type_(type), approx_time_(NowMicros()) {
+
+ sem_init(&bg_done_, 0, 0);
+ pthread_create(&bg_, NULL, &BGThreadWrapper, this);
+ }
+
+ ~FastClock() {
+ sem_post(&bg_done_);
+ pthread_join(bg_, NULL);
+ sem_destroy(&bg_done_);
+ }
+
+ // Returns true if the current time is guaranteed to be past "when_micros".
+ // This method is very fast.
+ inline bool HasReached(int64_t when_micros) {
+ return approx_time_ >= when_micros;
+ // NOTE: this is the same as we're dealing with an int64_t
+ //return (base::subtle::NoBarrier_Load(&approx_time_) >= when_micros);
+ }
+
+ // Returns the current time in microseconds past the epoch.
+ int64_t NowMicros() const {
+ double t = 0;
+ switch (type_) {
+ case REAL_TIME:
+ t = walltime::Now();
+ break;
+ case CPU_TIME:
+ t = MyCPUUsage() + ChildrenCPUUsage();
+ break;
+ }
+ return static_cast<int64_t>(t * 1e6);
+ }
+
+ // Reinitialize if necessary (since clock type may be change once benchmark
+ // function starts running - see UseRealTime).
+ void InitType(Type type) {
+ type_ = type;
+ approx_time_ = NowMicros();
+ // NOTE: This is the same barring a memory barrier
+ // base::subtle::Release_Store(&approx_time_, NowMicros());
+ }
+
+ private:
+ Type type_;
+ int64_t approx_time_; // Last time measurement taken by bg_
+ pthread_t bg_; // Background thread that updates last_time_ once every ms
+
+ sem_t bg_done_;
+
+ static void* BGThreadWrapper(void* that) {
+ ((FastClock*)that)->BGThread();
+ return NULL;
+ }
+
+ void BGThread() {
+ int done = 0;
+ do {
+ SleepForMicroseconds(1000);
+ approx_time_ = NowMicros();
+ // NOTE: same code but no memory barrier. think on it.
+ //base::subtle::Release_Store(&approx_time_, NowMicros());
+ sem_getvalue(&bg_done_, &done);
+ } while (done == 0);
+ }
+
+ DISALLOW_COPY_AND_ASSIGN(FastClock);
+};
+
+namespace internal {
+
+const int Benchmark::kNumCpuMarker;
+
+// Information kept per benchmark we may want to run
+struct Benchmark::Instance {
+ Instance()
+ : rangeXset(false), rangeX(kNoRange),
+ rangeYset(false), rangeY(kNoRange) {}
+
+ std::string name;
+ Benchmark* bm;
+ bool rangeXset;
+ int rangeX;
+ bool rangeYset;
+ int rangeY;
+ int threads; // Number of concurrent threads to use
+
+ bool multithreaded() const { return !bm->thread_counts_.empty(); }
+};
+
+} // end namespace internal
+
+struct State::SharedState {
+ const internal::Benchmark::Instance* instance;
+ pthread_mutex_t mu;
+ int starting; // Number of threads that have entered STARTING state
+ int stopping; // Number of threads that have entered STOPPING state
+ int threads; // Number of total threads that are running concurrently
+ internal::Benchmark::ThreadStats stats;
+ std::vector<internal::BenchmarkRunData> runs; // accumulated runs
+ std::string label;
+
+ SharedState(const internal::Benchmark::Instance* b, int t)
+ : instance(b), starting(0), stopping(0), threads(t) {
+ }
+ DISALLOW_COPY_AND_ASSIGN(SharedState);
+};
+
+namespace internal {
+
+Benchmark::Benchmark(const char* name, BenchmarkFunction f)
+ : name_(name), function_(f) {
+ mutex_lock l(&benchmark_mutex);
+ if (families == nullptr)
+ families = new std::vector<Benchmark*>;
+ registration_index_ = families->size();
+ families->push_back(this);
+}
+
+Benchmark::~Benchmark() {
+ mutex_lock l(&benchmark_mutex);
+ CHECK((*families)[registration_index_] == this);
+ (*families)[registration_index_] = NULL;
+ // Shrink the vector if convenient.
+ while (!families->empty() && families->back() == NULL)
+ families->pop_back();
+}
+
+Benchmark* Benchmark::Arg(int x) {
+ mutex_lock l(&benchmark_mutex);
+ rangeX_.push_back(x);
+ return this;
+}
+
+Benchmark* Benchmark::Range(int start, int limit) {
+ std::vector<int> arglist;
+ AddRange(&arglist, start, limit, kRangeMultiplier);
+
+ mutex_lock l(&benchmark_mutex);
+ for (size_t i = 0; i < arglist.size(); ++i)
+ rangeX_.push_back(arglist[i]);
+ return this;
+}
+
+Benchmark* Benchmark::DenseRange(int start, int limit) {
+ CHECK_GE(start, 0);
+ CHECK_LE(start, limit);
+ mutex_lock l(&benchmark_mutex);
+ for (int arg = start; arg <= limit; ++arg)
+ rangeX_.push_back(arg);
+ return this;
+}
+
+Benchmark* Benchmark::ArgPair(int x, int y) {
+ mutex_lock l(&benchmark_mutex);
+ rangeX_.push_back(x);
+ rangeY_.push_back(y);
+ return this;
+}
+
+Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
+ std::vector<int> arglist1, arglist2;
+ AddRange(&arglist1, lo1, hi1, kRangeMultiplier);
+ AddRange(&arglist2, lo2, hi2, kRangeMultiplier);
+
+ mutex_lock l(&benchmark_mutex);
+ rangeX_.resize(arglist1.size());
+ std::copy(arglist1.begin(), arglist1.end(), rangeX_.begin());
+ rangeY_.resize(arglist2.size());
+ std::copy(arglist2.begin(), arglist2.end(), rangeY_.begin());
+ return this;
+}
+
+Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
+ custom_arguments(this);
+ return this;
+}
+
+Benchmark* Benchmark::Threads(int t) {
+ CHECK_GT(t, 0);
+ mutex_lock l(&benchmark_mutex);
+ thread_counts_.push_back(t);
+ return this;
+}
+
+Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
+ CHECK_GT(min_threads, 0);
+ CHECK_GE(max_threads, min_threads);
+
+ mutex_lock l(&benchmark_mutex);
+ AddRange(&thread_counts_, min_threads, max_threads, 2);
+ return this;
+}
+
+Benchmark* Benchmark::ThreadPerCpu() {
+ mutex_lock l(&benchmark_mutex);
+ thread_counts_.push_back(kNumCpuMarker);
+ return this;
+}
+
+void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
+ CHECK_GE(lo, 0);
+ CHECK_GE(hi, lo);
+
+ // Add "lo"
+ dst->push_back(lo);
+
+ // Now space out the benchmarks in multiples of "mult"
+ for (int32_t i = 1; i < std::numeric_limits<int32_t>::max()/mult; i *= mult) {
+ if (i >= hi) break;
+ if (i > lo)
+ dst->push_back(i);
+ }
+ // Add "hi" (if different from "lo")
+ if (hi != lo)
+ dst->push_back(hi);
+}
+
+std::vector<Benchmark::Instance> Benchmark::CreateBenchmarkInstances(
+ int rangeXindex, int rangeYindex) {
+ // Special list of thread counts to use when none are specified
+ std::vector<int> one_thread;
+ one_thread.push_back(1);
+
+ std::vector<Benchmark::Instance> instances;
+
+ const bool is_multithreaded = (!thread_counts_.empty());
+ const std::vector<int>* thread_counts =
+ (is_multithreaded ? &thread_counts_ : &one_thread);
+ for (size_t t = 0; t < thread_counts->size(); ++t) {
+ int num_threads = (*thread_counts)[t];
+ if (num_threads == kNumCpuMarker)
+ num_threads = NumCPUs();
+
+ Instance instance;
+ instance.name = name_;
+ instance.bm = this;
+ instance.threads = num_threads;
+
+ if (rangeXindex != kNoRange) {
+ instance.rangeX = rangeX_[rangeXindex];
+ instance.rangeXset = true;
+ AppendHumanReadable(instance.rangeX, &instance.name);
+ }
+ if (rangeYindex != kNoRange) {
+ instance.rangeY = rangeY_[rangeYindex];
+ instance.rangeYset = true;
+ AppendHumanReadable(instance.rangeY, &instance.name);
+ }
+
+ // Add the number of threads used to the name
+ if (is_multithreaded) {
+ std::stringstream ss;
+ ss << "/threads:" << instance.threads;
+ instance.name += ss.str();
+ }
+
+ instances.push_back(instance);
+ }
+
+ return instances;
+}
+
+// Extract the list of benchmark instances that match the specified
+// regular expression.
+void Benchmark::FindBenchmarks(const std::string& spec,
+ std::vector<Instance>* benchmarks) {
+ // Make regular expression out of command-line flag
+ regex_t re;
+ int ec = regcomp(&re, spec.c_str(), REG_EXTENDED | REG_NOSUB);
+ if (ec != 0) {
+ size_t needed = regerror(ec, &re, NULL, 0);
+ char* errbuf = new char[needed];
+ regerror(ec, &re, errbuf, needed);
+ std::cerr << "Could not compile benchmark re: " << errbuf << "\n";
+ delete[] errbuf;
+ return;
+ }
+
+ mutex_lock l(&benchmark_mutex);
+ for (Benchmark* family : *families) {
+ if (family == nullptr) continue; // Family was deleted
+
+ // Match against filter.
+ if (regexec(&re, family->name_.c_str(), 0, NULL, 0) != 0) {
+#ifdef DEBUG
+ std::cout << "Skipping " << family->name_ << "\n";
+#endif
+ continue;
+ }
+
+ std::vector<Benchmark::Instance> instances;
+ if (family->rangeX_.empty() && family->rangeY_.empty()) {
+ instances = family->CreateBenchmarkInstances(kNoRange, kNoRange);
+ benchmarks->insert(benchmarks->end(), instances.begin(), instances.end());
+ } else if (family->rangeY_.empty()) {
+ for (size_t x = 0; x < family->rangeX_.size(); ++x) {
+ instances = family->CreateBenchmarkInstances(x, kNoRange);
+ benchmarks->insert(benchmarks->end(),
+ instances.begin(), instances.end());
+ }
+ } else {
+ for (size_t x = 0; x < family->rangeX_.size(); ++x) {
+ for (size_t y = 0; y < family->rangeY_.size(); ++y) {
+ instances = family->CreateBenchmarkInstances(x, y);
+ benchmarks->insert(benchmarks->end(),
+ instances.begin(), instances.end());
+ }
+ }
+ }
+ }
+}
+
+void Benchmark::MeasureOverhead() {
+ State::FastClock clock(State::FastClock::CPU_TIME);
+ State::SharedState state(NULL, 1);
+ State runner(&clock, &state, 0);
+ while (runner.KeepRunning()) {}
+ overhead = state.runs[0].real_accumulated_time /
+ static_cast<double>(state.runs[0].iterations);
+#ifdef DEBUG
+ std::cout << "Per-iteration overhead for doing nothing: " << overhead << "\n";
+#endif
+}
+
+void Benchmark::RunInstance(const Instance& b, BenchmarkReporter* br) {
+ use_real_time = false;
+ running_benchmark = true;
+ // get_memory_usage = FLAGS_gbenchmark_memory_usage;
+ State::FastClock clock(State::FastClock::CPU_TIME);
+
+ // Initialize the test runners.
+ State::SharedState state(&b, b.threads);
+ {
+ std::unique_ptr<State> runners[b.threads];
+ // TODO: create thread objects
+ for (int i = 0; i < b.threads; ++i)
+ runners[i].reset(new State(&clock, &state, i));
+
+ // Run them all.
+ for (int i = 0; i < b.threads; ++i) {
+ State* r = runners[i].release();
+ if (b.multithreaded()) {
+ // TODO: start pthreads (member of state?) and set up thread local
+ // pointers to stats
+ //pool->Add(base::NewCallback(r, &State::Run));
+ } else {
+ pthread_setspecific(thread_stats_key, thread_stats);
+ r->Run();
+ }
+ }
+ if (b.multithreaded()) {
+ // TODO: join all the threads
+ //pool->JoinAll();
+ }
+ }
+/*
+ double mem_usage = 0;
+ if (get_memory_usage) {
+ // Measure memory usage
+ Notification mem_done;
+ BenchmarkRun mem_run;
+ BenchmarkRun::SharedState mem_shared(&b, 1);
+ mem_run.Init(&clock, &mem_shared, 0);
+ {
+ testing::MallocCounter mc(testing::MallocCounter::THIS_THREAD_ONLY);
+ benchmark_mc = &mc;
+ mem_run.Run(&mem_done);
+ mem_done.WaitForNotification();
+ benchmark_mc = NULL;
+ mem_usage = mc.PeakHeapGrowth();
+ }
+ }
+*/
+ running_benchmark = false;
+
+ for (internal::BenchmarkRunData& report : state.runs) {
+ double seconds = (use_real_time ? report.real_accumulated_time :
+ report.cpu_accumulated_time);
+ // TODO: add the thread index here?
+ report.benchmark_name = b.name;
+ report.report_label = state.label;
+ report.bytes_per_second = state.stats.bytes_processed / seconds;
+ report.items_per_second = state.stats.items_processed / seconds;
+ report.max_heapbytes_used = MeasurePeakHeapMemory(b);
+ }
+
+ br->ReportRuns(state.runs);
+}
+
+// Run the specified benchmark, measure its peak memory usage, and
+// return the peak memory usage.
+double Benchmark::MeasurePeakHeapMemory(const Instance& b) {
+ if (!get_memory_usage)
+ return 0.0;
+ double bytes = 0.0;
+ /* TODO(dominich)
+ // Should we do multi-threaded runs?
+ const int num_threads = 1;
+ const int num_iters = 1;
+ {
+// internal::MallocCounter mc(internal::MallocCounter::THIS_THREAD_ONLY);
+ running_benchmark = true;
+ timer_manager = new TimerManager(1, NULL);
+// benchmark_mc = &mc;
+ timer_manager->StartTimer();
+
+ b.Run(num_iters);
+
+ running_benchmark = false;
+ delete timer_manager;
+ timer_manager = NULL;
+// benchmark_mc = NULL;
+// bytes = mc.PeakHeapGrowth();
+ }
+ */
+ return bytes;
+}
+
+} // end namespace internal
+
+State::State(FastClock* clock, SharedState* s, int t)
+ : thread_index(t),
+ state_(STATE_INITIAL),
+ clock_(clock),
+ shared_(s),
+ iterations_(0),
+ start_cpu_(0.0),
+ start_time_(0.0),
+ stop_time_micros_(0.0),
+ start_pause_(0.0),
+ pause_time_(0.0),
+ total_iterations_(0),
+ interval_micros_(
+ static_cast<int64_t>(1e6 * FLAGS_benchmark_min_time /
+ FLAGS_benchmark_repetitions)) {
+}
+
+bool State::KeepRunning() {
+ // Fast path
+ if (!clock_->HasReached(stop_time_micros_ + pause_time_)) {
+ ++iterations_;
+ return true;
+ }
+
+ switch(state_) {
+ case STATE_INITIAL: return StartRunning();
+ case STATE_STARTING: CHECK(false); return true;
+ case STATE_RUNNING: return FinishInterval();
+ case STATE_STOPPING: return MaybeStop();
+ case STATE_STOPPED: CHECK(false); return true;
+ }
+ CHECK(false);
+ return false;
+}
+
+void State::PauseTiming() {
+ start_pause_ = walltime::Now();
+}
+
+void State::ResumeTiming() {
+ pause_time_ += walltime::Now() - start_pause_;
+}
+
+void State::SetBytesProcessed(int64_t bytes) {
+ CHECK_EQ(STATE_STOPPED, state_);
+ mutex_lock l(&shared_->mu);
+ internal::Benchmark::ThreadStats* thread_stats =
+ (internal::Benchmark::ThreadStats*) pthread_getspecific(thread_stats_key);
+ thread_stats->bytes_processed = bytes;
+}
+
+void State::SetItemsProcessed(int64_t items) {
+ CHECK_EQ(STATE_STOPPED, state_);
+ mutex_lock l(&shared_->mu);
+ internal::Benchmark::ThreadStats* thread_stats =
+ (internal::Benchmark::ThreadStats*) pthread_getspecific(thread_stats_key);
+ thread_stats->items_processed = items;
+}
+
+void State::SetLabel(const std::string& label) {
+ CHECK_EQ(STATE_STOPPED, state_);
+ mutex_lock l(&shared_->mu);
+ shared_->label = label;
+}
+
+int State::range_x() const {
+ CHECK(shared_->instance->rangeXset);
+ /*
+ <<
+ "Failed to get range_x as it was not set. Did you register your "
+ "benchmark with a range parameter?";
+ */
+ return shared_->instance->rangeX;
+}
+
+int State::range_y() const {
+ CHECK(shared_->instance->rangeYset);
+ /* <<
+ "Failed to get range_y as it was not set. Did you register your "
+ "benchmark with a range parameter?";
+ */
+ return shared_->instance->rangeY;
+}
+
+bool State::StartRunning() {
+ {
+ mutex_lock l(&shared_->mu);
+ CHECK_EQ(state_, STATE_INITIAL);
+ state_ = STATE_STARTING;
+ is_continuation_ = false;
+ CHECK_LT(shared_->starting, shared_->threads);
+ ++shared_->starting;
+ if (shared_->starting == shared_->threads) {
+ // Last thread to start.
+ clock_->InitType(
+ use_real_time ? FastClock::REAL_TIME : FastClock::CPU_TIME);
+ } else {
+ // Wait for others.
+ // TODO(dominic): semaphore!
+ // while (pthread_getsemaphore(shared_->starting_sem_) !=
+ // shared_->threads) { }
+ //shared_->mu.Await(base::Condition(this, &State::AllStarting));
+ }
+ CHECK_EQ(state_, STATE_STARTING);
+ state_ = STATE_RUNNING;
+ }
+ NewInterval();
+ return true;
+}
+
+bool State::AllStarting() {
+ CHECK_LE(shared_->starting, shared_->threads);
+ return shared_->starting == shared_->threads;
+}
+
+void State::NewInterval() {
+ stop_time_micros_ = clock_->NowMicros() + interval_micros_;
+ if (!is_continuation_) {
+#ifdef DEBUG
+ std::cout << "Starting new interval; stopping in " << interval_micros_
+ << "\n";
+#endif
+ iterations_ = 0;
+ pause_time_ = 0;
+ start_cpu_ = MyCPUUsage() + ChildrenCPUUsage();
+ start_time_ = walltime::Now();
+ } else {
+#ifdef DEBUG
+ std::cout << "Continuing interval; stopping in " << interval_micros_
+ << "\n";
+#endif
+ }
+}
+
+bool State::FinishInterval() {
+ if (iterations_ < FLAGS_benchmark_min_iters / FLAGS_benchmark_repetitions &&
+ interval_micros_ < 5000000) {
+ interval_micros_ *= 2;
+#ifdef DEBUG
+ std::cout << "Interval was too short; trying again for "
+ << interval_micros_ << " useconds.\n";
+#endif
+ is_continuation_ = false;
+ NewInterval();
+ return true;
+ }
+
+ internal::BenchmarkRunData data;
+ data.thread_index = thread_index;
+ data.iterations = iterations_;
+ data.thread_index = thread_index;
+
+ const double accumulated_time = walltime::Now() - start_time_;
+ const double total_overhead = 0.0; // TODO: overhead * iterations_;
+ CHECK_LT(pause_time_, accumulated_time);
+ CHECK_LT(pause_time_ + total_overhead, accumulated_time);
+ data.real_accumulated_time =
+ accumulated_time - (pause_time_ + total_overhead);
+ data.cpu_accumulated_time = (MyCPUUsage() + ChildrenCPUUsage()) - start_cpu_;
+ total_iterations_ += iterations_;
+
+ bool keep_going = false;
+ {
+ mutex_lock l(&shared_->mu);
+ if (is_continuation_)
+ shared_->runs.back() = data;
+ else
+ shared_->runs.push_back(data);
+ keep_going = RunAnotherInterval();
+ if (!keep_going) {
+ ++shared_->stopping;
+ if (shared_->stopping < shared_->threads) {
+ // Other threads are still running, so continue running but without
+ // timing to present an expected background load to the other threads.
+ state_ = STATE_STOPPING;
+ keep_going = true;
+ } else {
+ state_ = STATE_STOPPED;
+ }
+ }
+ }
+
+ if (state_ == STATE_RUNNING) {
+ is_continuation_ = true;
+ NewInterval();
+ }
+ return keep_going;
+}
+
+bool State::RunAnotherInterval() const {
+ if (total_iterations_ < FLAGS_benchmark_min_iters)
+ return true;
+ if (total_iterations_ > FLAGS_benchmark_max_iters)
+ return false;
+ if (static_cast<int>(shared_->runs.size()) >= FLAGS_benchmark_repetitions)
+ return false;
+ return true;
+}
+
+bool State::MaybeStop() {
+ mutex_lock l(&shared_->mu);
+ if (shared_->stopping < shared_->threads) {
+ CHECK_EQ(state_, STATE_STOPPING);
+ return true;
+ }
+ state_ = STATE_STOPPED;
+ return false;
+}
+
+void State::Run() {
+ internal::Benchmark::ThreadStats* thread_stats =
+ (internal::Benchmark::ThreadStats*) pthread_getspecific(thread_stats_key);
+ thread_stats->Reset();
+ shared_->instance->bm->function_(*this);
+ {
+ mutex_lock l(&shared_->mu);
+ shared_->stats.Add(*thread_stats);
+ }
+}
+
+namespace internal {
+
+void RunMatchingBenchmarks(const std::string& spec,
+ BenchmarkReporter* reporter) {
+ CHECK(reporter != NULL);
+ if (spec.empty()) return;
+
+ std::vector<internal::Benchmark::Instance> benchmarks;
+ internal::Benchmark::FindBenchmarks(spec, &benchmarks);
+
+ // Determine the width of the name field using a minimum width of 10.
+ // Also determine max number of threads needed.
+ int name_field_width = 10;
+ for (const internal::Benchmark::Instance& benchmark : benchmarks) {
+ // Add width for _stddev and threads:XX
+ if (benchmark.threads > 1 && FLAGS_benchmark_repetitions > 1) {
+ name_field_width = std::max<int>(name_field_width,
+ benchmark.name.size() + 17);
+ } else if (benchmark.threads> 1) {
+ name_field_width = std::max<int>(name_field_width,
+ benchmark.name.size() + 10);
+ } else if (FLAGS_benchmark_repetitions > 1) {
+ name_field_width = std::max<int>(name_field_width,
+ benchmark.name.size() + 7);
+ } else {
+ name_field_width = std::max<int>(name_field_width,
+ benchmark.name.size());
+ }
+ }
+
+ // Print header here
+ BenchmarkContextData context;
+ context.num_cpus = NumCPUs();
+ context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
+// context.cpu_info = base::CompactCPUIDInfoString();
+ context.cpu_scaling_enabled = CpuScalingEnabled();
+ context.name_field_width = name_field_width;
+
+ if (reporter->ReportContext(context)) {
+ for (internal::Benchmark::Instance& benchmark : benchmarks) {
+ //std::unique_ptr<thread::ThreadPool> pool;
+ //if (benchmark.threads > 0) {
+ // pool = new thread::ThreadPool(benchmark.threads);
+ // pool->StartWorkers();
+ //}
+ Benchmark::RunInstance(/*pool, */benchmark, reporter);
+ }
+ }
+}
+
+void FindMatchingBenchmarkNames(const std::string& spec,
+ std::vector<std::string>* benchmark_names) {
+ if (spec.empty()) return;
+
+ std::vector<internal::Benchmark::Instance> benchmarks;
+ internal::Benchmark::FindBenchmarks(spec, &benchmarks);
+ std::transform(benchmarks.begin(), benchmarks.end(), benchmark_names->begin(),
+ [] (const internal::Benchmark::Instance& b) { return b.name; } );
+}
+
+} // end namespace internal
+
+void RunSpecifiedBenchmarks() {
+ std::string spec = FLAGS_benchmark_filter;
+ if (spec.empty() || spec == "all")
+ spec = "."; // Regexp that matches all benchmarks
+ internal::ConsoleReporter default_reporter;
+ internal::RunMatchingBenchmarks(spec, &default_reporter);
+}
+
+void Initialize(int* argc, const char** argv) {
+ //AtomicOps_Internalx86CPUFeaturesInit();
+ pthread_mutex_init(&benchmark_mutex, nullptr);
+ pthread_key_create(&thread_stats_key, DeleteThreadStats);
+ thread_stats = new internal::Benchmark::ThreadStats();
+ walltime::Initialize();
+ internal::Benchmark::MeasureOverhead();
+ internal::ParseCommandLineFlags(argc, argv);
+}
+
+
+} // end namespace benchmark
diff --git a/src/colorprint.cc b/src/colorprint.cc
new file mode 100644
index 0000000..42d69cb
--- /dev/null
+++ b/src/colorprint.cc
@@ -0,0 +1,82 @@
+#include "colorprint.h"
+
+#include <stdarg.h>
+
+#include "commandlineflags.h"
+
+DECLARE_bool(color_print);
+
+namespace {
+#ifdef OS_WINDOWS
+typedef WORD PlatformColorCode;
+#else
+typedef const char* PlatformColorCode;
+#endif
+
+PlatformColorCode GetPlatformColorCode(LogColor color) {
+#ifdef OS_WINDOWS
+ switch (color) {
+ case COLOR_RED: return FOREGROUND_RED;
+ case COLOR_GREEN: return FOREGROUND_GREEN;
+ case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+ case COLOR_BLUE: return FOREGROUND_BLUE;
+ case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
+ case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN;
+ case COLOR_WHITE: // fall through to default
+ default: return 0;
+ }
+#else
+ switch (color) {
+ case COLOR_RED: return "1";
+ case COLOR_GREEN: return "2";
+ case COLOR_YELLOW: return "3";
+ case COLOR_BLUE: return "4";
+ case COLOR_MAGENTA: return "5";
+ case COLOR_CYAN: return "6";
+ case COLOR_WHITE: return "7";
+ default: return NULL;
+ };
+#endif
+}
+} // end namespace
+
+void ColorPrintf(LogColor color, const char* fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+
+ if (!FLAGS_color_print) {
+ vprintf(fmt, args);
+ va_end(args);
+ return;
+ }
+
+#ifdef OS_WINDOWS
+ const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+
+ // Gets the current text color.
+ CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+ GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+ const WORD old_color_attrs = buffer_info.wAttributes;
+
+ // We need to flush the stream buffers into the console before each
+ // SetConsoleTextAttribute call lest it affect the text that is already
+ // printed but has not yet reached the console.
+ fflush(stdout);
+ SetConsoleTextAttribute(stdout_handle,
+ GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
+ vprintf(fmt, args);
+
+ fflush(stdout);
+ // Restores the text color.
+ SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+#else
+ const char* color_code = GetPlatformColorCode(color);
+ if (color_code)
+ fprintf(stdout, "\033[0;3%sm", color_code);
+ vprintf(fmt, args);
+ printf("\033[m"); // Resets the terminal to default.
+#endif
+ va_end(args);
+}
+
+
diff --git a/src/colorprint.h b/src/colorprint.h
new file mode 100644
index 0000000..5789c2e
--- /dev/null
+++ b/src/colorprint.h
@@ -0,0 +1,17 @@
+#ifndef BENCHMARK_COLORPRINT_H_
+#define BENCHMARK_COLORPRINT_H_
+
+enum LogColor {
+ COLOR_DEFAULT,
+ COLOR_RED,
+ COLOR_GREEN,
+ COLOR_YELLOW,
+ COLOR_BLUE,
+ COLOR_MAGENTA,
+ COLOR_CYAN,
+ COLOR_WHITE
+};
+
+void ColorPrintf(LogColor color, const char* fmt, ...);
+
+#endif // BENCHMARK_COLORPRINT_H_
diff --git a/src/commandlineflags.cc b/src/commandlineflags.cc
new file mode 100644
index 0000000..331b8ff
--- /dev/null
+++ b/src/commandlineflags.cc
@@ -0,0 +1,213 @@
+#include "commandlineflags.h"
+
+#include <string.h>
+
+#include <iostream>
+#include <limits>
+
+namespace benchmark {
+// Parses 'str' for a 32-bit signed integer. If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
+ // Parses the environment variable as a decimal integer.
+ char* end = NULL;
+ const long long_value = strtol(str, &end, 10); // NOLINT
+
+ // Has strtol() consumed all characters in the string?
+ if (*end != '\0') {
+ // No - an invalid character was encountered.
+ std::cerr << src_text << " is expected to be a 32-bit integer, "
+ << "but actually has value \"" << str << "\".\n";
+ return false;
+ }
+
+ // Is the parsed value in the range of an Int32?
+ const int32_t result = static_cast<int32_t>(long_value);
+ if (long_value == std::numeric_limits<long>::max() ||
+ long_value == std::numeric_limits<long>::min() ||
+ // The parsed value overflows as a long. (strtol() returns
+ // LONG_MAX or LONG_MIN when the input overflows.)
+ result != long_value
+ // The parsed value overflows as an Int32.
+ ) {
+ std::cerr << src_text << " is expected to be a 32-bit integer, "
+ << "but actually has value \"" << str << "\", "
+ << "which overflows.\n";
+ return false;
+ }
+
+ *value = result;
+ return true;
+}
+
+// Parses 'str' for a double. If successful, writes the result to *value and
+// returns true; otherwise leaves *value unchanged and returns false.
+bool ParseDouble(const std::string& src_text, const char* str, double* value) {
+ // Parses the environment variable as a decimal integer.
+ char* end = NULL;
+ const double double_value = strtod(str, &end); // NOLINT
+
+ // Has strtol() consumed all characters in the string?
+ if (*end != '\0') {
+ // No - an invalid character was encountered.
+ std::cerr << src_text << " is expected to be a double, "
+ << "but actually has value \"" << str << "\".\n";
+ return false;
+ }
+
+ *value = double_value;
+ return true;
+}
+
+
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE
+ // We are on Windows CE, which has no environment variables.
+ return NULL;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+ // Environment variables which we programmatically clear will be set to the
+ // empty string rather than unset (NULL). Handle that case.
+ const char* const env = getenv(name);
+ return (env != NULL && env[0] != '\0') ? env : NULL;
+#else
+ return getenv(name);
+#endif
+}
+
+// Returns the name of the environment variable corresponding to the
+// given flag. For example, FlagToEnvVar("foo") will return
+// "BENCHMARK_FOO" in the open-source version.
+static std::string FlagToEnvVar(const char* flag) {
+ const std::string flag_str(flag);
+
+ std::string env_var;
+ for (size_t i = 0; i != flag_str.length(); ++i)
+ env_var += ::toupper(flag_str.c_str()[i]);
+
+ return "BENCHMARK_" + env_var;
+}
+
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true iff it's not "0".
+bool BoolFromEnv(const char* flag, bool default_value) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const string_value = GetEnv(env_var.c_str());
+ return string_value == NULL ?
+ default_value : strcmp(string_value, "0") != 0;
+}
+
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+int32_t Int32FromEnv(const char* flag, int32_t default_value) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const string_value = GetEnv(env_var.c_str());
+ if (string_value == NULL) {
+ // The environment variable is not set.
+ return default_value;
+ }
+
+ int32_t result = default_value;
+ if (!ParseInt32(std::string("Environment variable ") + env_var,
+ string_value, &result)) {
+ std::cout << "The default value " << default_value << " is used.\n";
+ return default_value;
+ }
+
+ return result;
+}
+
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromEnv(const char* flag, const char* default_value) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const value = GetEnv(env_var.c_str());
+ return value == NULL ? default_value : value;
+}
+
+// Parses a string as a command line flag. The string should have
+// the format "--flag=value". When def_optional is true, the "=value"
+// part can be omitted.
+//
+// Returns the value of the flag, or NULL if the parsing failed.
+const char* ParseFlagValue(const char* str,
+ const char* flag,
+ bool def_optional) {
+ // str and flag must not be NULL.
+ if (str == NULL || flag == NULL) return NULL;
+
+ // The flag must start with "--".
+ const std::string flag_str = std::string("--") + std::string(flag);
+ const size_t flag_len = flag_str.length();
+ if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
+
+ // Skips the flag name.
+ const char* flag_end = str + flag_len;
+
+ // When def_optional is true, it's OK to not have a "=value" part.
+ if (def_optional && (flag_end[0] == '\0'))
+ return flag_end;
+
+ // If def_optional is true and there are more characters after the
+ // flag name, or if def_optional is false, there must be a '=' after
+ // the flag name.
+ if (flag_end[0] != '=') return NULL;
+
+ // Returns the string after "=".
+ return flag_end + 1;
+}
+
+bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, true);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ // Converts the string value to a bool.
+ *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+ return true;
+}
+
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ // Sets *value to the value of the flag.
+ return ParseInt32(std::string("The value of flag --") + flag,
+ value_str, value);
+}
+
+bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ // Sets *value to the value of the flag.
+ return ParseDouble(std::string("The value of flag --") + flag,
+ value_str, value);
+}
+
+bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ *value = value_str;
+ return true;
+}
+
+bool IsFlag(const char* str, const char* flag) {
+ return (ParseFlagValue(str, flag, true) != NULL);
+}
+} // end namespace benchmark
diff --git a/src/commandlineflags.h b/src/commandlineflags.h
new file mode 100644
index 0000000..056d9fc
--- /dev/null
+++ b/src/commandlineflags.h
@@ -0,0 +1,79 @@
+#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
+#define BENCHMARK_COMMANDLINEFLAGS_H_
+
+#include <stdint.h>
+
+#include <string>
+
+// Macro for referencing flags.
+#define FLAG(name) FLAGS_##name
+
+// Macros for declaring flags.
+#define DECLARE_bool(name) extern bool FLAG(name)
+#define DECLARE_int32(name) extern int32_t FLAG(name)
+#define DECLARE_int64(name) extern int64_t FLAG(name)
+#define DECLARE_double(name) extern double FLAG(name)
+#define DECLARE_string(name) extern std::string FLAG(name)
+
+// Macros for defining flags.
+#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
+#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
+#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
+#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
+#define DEFINE_string(name, default_val, doc) \
+ std::string FLAG(name) = (default_val)
+
+namespace benchmark {
+
+// Parses 'str' for a 32-bit signed integer. If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
+
+// Parses a bool/Int32/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromEnv(const char* flag, bool default_val);
+int32_t Int32FromEnv(const char* flag, int32_t default_val);
+double DoubleFromEnv(const char* flag, double default_val);
+const char* StringFromEnv(const char* flag, const char* default_val);
+
+// Parses a string for a bool flag, in the form of either
+// "--flag=value" or "--flag".
+//
+// In the former case, the value is taken as true as long as it does
+// not start with '0', 'f', or 'F'.
+//
+// In the latter case, the value is taken as true.
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseBoolFlag(const char* str, const char* flag, bool* value);
+
+// Parses a string for an Int32 flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
+
+// Parses a string for a Double flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseDoubleFlag(const char* str, const char* flag, double* value);
+
+// Parses a string for a string flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseStringFlag(const char* str, const char* flag, std::string* value);
+
+// Returns true if the string matches the flag.
+bool IsFlag(const char* str, const char* flag);
+
+} // end namespace gbenchmark
+
+#endif // BENCHMARK_COMMANDLINEFLAGS_H_
+
diff --git a/src/cycleclock.h b/src/cycleclock.h
new file mode 100644
index 0000000..d5ba314
--- /dev/null
+++ b/src/cycleclock.h
@@ -0,0 +1,129 @@
+// ----------------------------------------------------------------------
+// CycleClock
+// A CycleClock tells you the current time in Cycles. The "time"
+// is actually time since power-on. This is like time() but doesn't
+// involve a system call and is much more precise.
+//
+// NOTE: Not all cpu/platform/kernel combinations guarantee that this
+// clock increments at a constant rate or is synchronized across all logical
+// cpus in a system.
+//
+// If you need the above guarantees, please consider using a different
+// API. There are efforts to provide an interface which provides a millisecond
+// granularity and implemented as a memory read. A memory read is generally
+// cheaper than the CycleClock for many architectures.
+//
+// Also, in some out of order CPU implementations, the CycleClock is not
+// serializing. So if you're trying to count at cycles granularity, your
+// data might be inaccurate due to out of order instruction execution.
+// ----------------------------------------------------------------------
+
+#ifndef BENCHMARK_CYCLECLOCK_H_
+#define BENCHMARK_CYCLECLOCK_H_
+
+#include <stdint.h>
+
+#if defined(OS_MACOSX)
+# include <mach/mach_time.h>
+#endif
+// For MSVC, we want to use '_asm rdtsc' when possible (since it works
+// with even ancient MSVC compilers), and when not possible the
+// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some
+// environments, <windows.h> and <intrin.h> have conflicting
+// declarations of some other intrinsics, breaking compilation.
+// Therefore, we simply declare __rdtsc ourselves. See also
+// http://connect.microsoft.com/VisualStudio/feedback/details/262047
+#if defined(COMPILER_MSVC) && !defined(_M_IX86)
+extern "C" uint64_t __rdtsc();
+#pragma intrinsic(__rdtsc)
+#endif
+#include <sys/time.h>
+
+// NOTE: only i386 and x86_64 have been well tested.
+// PPC, sparc, alpha, and ia64 are based on
+// http://peter.kuscsik.com/wordpress/?p=14
+// with modifications by m3b. See also
+// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
+struct CycleClock {
+ // This should return the number of cycles since power-on. Thread-safe.
+ static inline int64_t Now() {
+#if defined(OS_MACOSX)
+ // this goes at the top because we need ALL Macs, regardless of
+ // architecture, to return the number of "mach time units" that
+ // have passed since startup. See sysinfo.cc where
+ // InitializeSystemInfo() sets the supposed cpu clock frequency of
+ // macs to the number of mach time units per second, not actual
+ // CPU clock frequency (which can change in the face of CPU
+ // frequency scaling). Also note that when the Mac sleeps, this
+ // counter pauses; it does not continue counting, nor does it
+ // reset to zero.
+ return mach_absolute_time();
+#elif defined(__i386__)
+ int64_t ret;
+ __asm__ volatile ("rdtsc" : "=A" (ret) );
+ return ret;
+#elif defined(__x86_64__) || defined(__amd64__)
+ uint64_t low, high;
+ __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
+ return (high << 32) | low;
+#elif defined(__powerpc__) || defined(__ppc__)
+ // This returns a time-base, which is not always precisely a cycle-count.
+ int64_t tbl, tbu0, tbu1;
+ asm("mftbu %0" : "=r" (tbu0));
+ asm("mftb %0" : "=r" (tbl));
+ asm("mftbu %0" : "=r" (tbu1));
+ tbl &= -static_cast<int64>(tbu0 == tbu1);
+ // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
+ return (tbu1 << 32) | tbl;
+#elif defined(__sparc__)
+ int64_t tick;
+ asm(".byte 0x83, 0x41, 0x00, 0x00");
+ asm("mov %%g1, %0" : "=r" (tick));
+ return tick;
+#elif defined(__ia64__)
+ int64_t itc;
+ asm("mov %0 = ar.itc" : "=r" (itc));
+ return itc;
+#elif defined(COMPILER_MSVC) && defined(_M_IX86)
+ // Older MSVC compilers (like 7.x) don't seem to support the
+ // __rdtsc intrinsic properly, so I prefer to use _asm instead
+ // when I know it will work. Otherwise, I'll use __rdtsc and hope
+ // the code is being compiled with a non-ancient compiler.
+ _asm rdtsc
+#elif defined(COMPILER_MSVC)
+ return __rdtsc();
+#elif defined(ARMV3)
+#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount
+ uint32_t pmccntr;
+ uint32_t pmuseren;
+ uint32_t pmcntenset;
+ // Read the user mode perf monitor counter access permissions.
+ asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
+ if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
+ asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
+ if (pmcntenset & 0x80000000ul) { // Is it counting?
+ asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
+ // The counter is set up to count every 64th cycle
+ return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
+ }
+ }
+#endif
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__mips__)
+ // mips apparently only allows rdtsc for superusers, so we fall
+ // back to gettimeofday. It's possible clock_gettime would be better.
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#else
+// The soft failover to a generic implementation is automatic only for ARM.
+// For other platforms the developer is expected to make an attempt to create
+// a fast implementation and use generic version if nothing better is available.
+#error You need to define CycleTimer for your OS and CPU
+#endif
+ }
+};
+
+#endif // BENCHMARK_CYCLECLOCK_H_
diff --git a/src/macros.h b/src/macros.h
new file mode 100644
index 0000000..b470328
--- /dev/null
+++ b/src/macros.h
@@ -0,0 +1,110 @@
+#ifndef BENCHMARK_MACROS_H_
+#define BENCHMARK_MACROS_H_
+
+#include <assert.h>
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&);
+
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example. If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+//
+// One caveat is that, for C++03, arraysize() doesn't accept any array of
+// an anonymous type or a type defined inside a function. In these rare
+// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
+// due to a limitation in C++03's template system. The limitation has
+// been removed in C++11.
+
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+
+// That gcc wants both of these prototypes seems mysterious. VC, for
+// its part, can't decide which to use (another mystery). Matching of
+// template overloads: the final frontier.
+#ifndef COMPILER_MSVC
+template <typename T, size_t N>
+char (&ArraySizeHelper(const T (&array)[N]))[N];
+#endif
+
+#define arraysize(array) (sizeof(ArraySizeHelper(array)))
+
+// The STATIC_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
+// content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+
+template <bool>
+struct StaticAssert {
+};
+
+#define STATIC_ASSERT(expr, msg) \
+ typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+
+// Implementation details of STATIC_ASSERT:
+//
+// - STATIC_ASSERT works by defining an array type that has -1
+// elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+// does not work, as gcc supports variable-length arrays whose sizes
+// are determined at run-time (this is gcc's extension and not part
+// of the C++ standard). As a result, gcc fails to reject the
+// following code with the simple definition:
+//
+// int foo;
+// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
+// // not a compile-time constant.
+//
+// - By using the type StaticAssert<(bool(expr))>, we ensures that
+// expr is a compile-time constant. (Template arguments must be
+// determined at compile-time.)
+//
+// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
+// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
+//
+// StaticAssert<bool(expr)>
+//
+// instead, these compilers will refuse to compile
+//
+// STATIC_ASSERT(5 > 0, some_message);
+//
+// (They seem to think the ">" in "5 > 0" marks the end of the
+// template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+// ((expr) ? 1 : -1).
+//
+// This is to avoid running into a bug in MS VC 7.1, which
+// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+
+#define CHECK(b) do { if (!(b)) assert(false); } while(0)
+#define CHECK_EQ(a, b) CHECK((a) == (b))
+#define CHECK_GE(a, b) CHECK((a) >= (b))
+#define CHECK_LE(a, b) CHECK((a) <= (b))
+#define CHECK_GT(a, b) CHECK((a) > (b))
+#define CHECK_LT(a, b) CHECK((a) < (b))
+
+
+#define ATTRIBUTE_UNUSED __attribute__ ((unused))
+
+#endif // BENCHMARK_MACROS_H_
diff --git a/src/mutex_lock.h b/src/mutex_lock.h
new file mode 100644
index 0000000..40f0fde
--- /dev/null
+++ b/src/mutex_lock.h
@@ -0,0 +1,20 @@
+#ifndef BENCHMARK_MUTEX_LOCK_H_
+#define BENCHMARK_MUTEX_LOCK_H_
+
+#include <pthread.h>
+
+class mutex_lock {
+ public:
+ explicit mutex_lock(pthread_mutex_t* mu) : mu_(mu) {
+ pthread_mutex_lock(mu_);
+ }
+
+ ~mutex_lock() {
+ pthread_mutex_unlock(mu_);
+ }
+
+ private:
+ pthread_mutex_t* mu_;
+};
+
+#endif // BENCHMARK_MUTEX_LOCK_H_
diff --git a/src/port.h b/src/port.h
new file mode 100644
index 0000000..7d8fe1c
--- /dev/null
+++ b/src/port.h
@@ -0,0 +1,8 @@
+#ifndef BENCHMARK_PORT_H_
+#define BENCHMARK_PORT_H_
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&);
+
+#endif // BENCHMARK_PORT_H_
diff --git a/src/sleep.cc b/src/sleep.cc
new file mode 100644
index 0000000..8229291
--- /dev/null
+++ b/src/sleep.cc
@@ -0,0 +1,42 @@
+#include "sleep.h"
+
+#include <time.h>
+#include <errno.h>
+
+#ifdef OS_WINDOWS
+
+// Window's _sleep takes milliseconds argument.
+void SleepForMilliseconds(int milliseconds) {
+ _sleep(milliseconds);
+}
+void SleepForSeconds(double seconds) {
+ SleepForMilliseconds(static_cast<int>(seconds * 1000));
+}
+
+#else // OS_WINDOWS
+
+static const int64_t kNumMillisPerSecond = 1000LL;
+static const int64_t kNumMicrosPerMilli = 1000LL;
+static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
+static const int64_t kNumNanosPerMicro = 1000LL;
+
+void SleepForMicroseconds(int64_t microseconds) {
+ struct timespec sleep_time;
+ sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
+ sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
+ while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
+ ; // Ignore signals and wait for the full interval to elapse.
+}
+
+void SleepForMilliseconds(int milliseconds) {
+ SleepForMicroseconds(static_cast<int64_t>(milliseconds) * kNumMicrosPerMilli);
+}
+
+void SleepForSeconds(double seconds) {
+ SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
+}
+
+#endif // OS_WINDOWS
+
+
+
diff --git a/src/sleep.h b/src/sleep.h
new file mode 100644
index 0000000..35b4263
--- /dev/null
+++ b/src/sleep.h
@@ -0,0 +1,10 @@
+#ifndef BENCHMARK_SLEEP_H_
+#define BENCHMARK_SLEEP_H_
+
+#include <stdint.h>
+
+void SleepForMicroseconds(int64_t microseconds);
+void SleepForMilliseconds(int milliseconds);
+void SleepForSeconds(double seconds);
+
+#endif // BENCHMARK_SLEEP_H_
diff --git a/src/stat.h b/src/stat.h
new file mode 100644
index 0000000..b121d47
--- /dev/null
+++ b/src/stat.h
@@ -0,0 +1,306 @@
+#ifndef BENCHMARK_STAT_H_
+#define BENCHMARK_STAT_H_
+
+#include <math.h>
+#include <iostream>
+#include <limits>
+
+template <typename VType, typename NumType>
+class Stat1;
+
+template <typename VType, typename NumType>
+class Stat1MinMax;
+
+typedef Stat1<float, float> Stat1_f;
+typedef Stat1<double, double> Stat1_d;
+typedef Stat1MinMax<float, float> Stat1MinMax_f;
+typedef Stat1MinMax<double, double> Stat1MinMax_d;
+
+template <typename VType> class Vector2;
+template <typename VType> class Vector3;
+template <typename VType> class Vector4;
+
+template <typename VType, typename NumType>
+class Stat1 {
+ public:
+ typedef Stat1<VType, NumType> Self;
+
+ Stat1() {
+ Clear();
+ }
+ void Clear() {
+ numsamples_ = NumType();
+ sum_squares_ = sum_ = VType();
+ }
+ // Create a sample of value dat and weight 1
+ explicit Stat1(const VType &dat) {
+ sum_ = dat;
+ sum_squares_ = Sqr(dat);
+ numsamples_ = 1;
+ }
+ // Create statistics for all the samples between begin (included)
+ // and end(excluded)
+ explicit Stat1(const VType *begin, const VType *end) {
+ Clear();
+ for ( const VType *item = begin; item < end; ++item ) {
+ (*this) += Stat1(*item);
+ }
+ }
+ // Create a sample of value dat and weight w
+ Stat1(const VType &dat, const NumType &w) {
+ sum_ = w * dat;
+ sum_squares_ = w * Sqr(dat);
+ numsamples_ = w;
+ }
+ // Copy operator
+ Stat1(const Self &stat) {
+ sum_ = stat.sum_;
+ sum_squares_ = stat.sum_squares_;
+ numsamples_ = stat.numsamples_;
+ }
+
+ inline Self &operator =(const Self &stat) {
+ sum_ = stat.sum_;
+ sum_squares_ = stat.sum_squares_;
+ numsamples_ = stat.numsamples_;
+ return (*this);
+ }
+ // Merge statistics from two sample sets.
+ inline Self &operator +=(const Self &stat) {
+ sum_ += stat.sum_;
+ sum_squares_+= stat.sum_squares_;
+ numsamples_ += stat.numsamples_;
+ return (*this);
+ }
+ // The operation opposite to +=
+ inline Self &operator -=(const Self &stat) {
+ sum_ -= stat.sum_;
+ sum_squares_-= stat.sum_squares_;
+ numsamples_ -= stat.numsamples_;
+ return (*this);
+ }
+ // Multiply the weight of the set of samples by a factor k
+ inline Self &operator *=(const VType &k) {
+ sum_ *= k;
+ sum_squares_*= k;
+ numsamples_ *= k;
+ return (*this);
+ }
+ // Merge statistics from two sample sets.
+ inline Self operator + (const Self &stat) const {
+ return Self(*this) += stat;
+ }
+ // The operation opposite to +
+ inline Self operator - (const Self &stat) const {
+ return Self(*this) -= stat;
+ }
+ // Multiply the weight of the set of samples by a factor k
+ inline Self operator * (const VType &k) const {
+ return Self(*this) *= k;
+ }
+ // Return the total weight of this sample set
+ NumType NumSamples() const {
+ return numsamples_;
+ }
+ // Return the sum of this sample set
+ VType Sum() const {
+ return sum_;
+ }
+ // Return the mean of this sample set
+ VType Mean() const {
+ if (numsamples_ == 0) return VType();
+ return sum_ * (1.0 / numsamples_);
+ }
+ // Return the mean of this sample set and compute the standard deviation at
+ // the same time.
+ VType Mean(VType *stddev) const {
+ if (numsamples_ == 0) return VType();
+ VType mean = sum_ * (1.0 / numsamples_);
+ if (stddev) {
+ VType avg_squares = sum_squares_ * (1.0 / numsamples_);
+ *stddev = Sqrt(avg_squares - Sqr(mean));
+ }
+ return mean;
+ }
+ // Return the standard deviation of the sample set
+ VType StdDev() const {
+ if (numsamples_ == 0) return VType();
+ VType mean = Mean();
+ VType avg_squares = sum_squares_ * (1.0 / numsamples_);
+ return Sqrt(avg_squares - Sqr(mean));
+ }
+ private:
+ // Let i be the index of the samples provided (using +=)
+ // and weight[i],value[i] be the data of sample #i
+ // then the variables have the following meaning:
+ NumType numsamples_; // sum of weight[i];
+ VType sum_; // sum of weight[i]*value[i];
+ VType sum_squares_; // sum of weight[i]*value[i]^2;
+
+ // Template function used to square a number.
+ // For a vector we square all components
+ template <typename SType>
+ static inline SType Sqr(const SType &dat) {
+ return dat * dat;
+ }
+ template <typename SType>
+ static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
+ return dat.MulComponents(dat);
+ }
+ template <typename SType>
+ static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
+ return dat.MulComponents(dat);
+ }
+ template <typename SType>
+ static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
+ return dat.MulComponents(dat);
+ }
+
+ // Template function used to take the square root of a number.
+ // For a vector we square all components
+ template <typename SType>
+ static inline SType Sqrt(const SType &dat) {
+ // Avoid NaN due to imprecision in the calculations
+ if ( dat < 0 )
+ return 0;
+ return sqrt(dat);
+ }
+ template <typename SType>
+ static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
+ // Avoid NaN due to imprecision in the calculations
+ return Max(dat, Vector2<SType>()).Sqrt();
+ }
+ template <typename SType>
+ static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
+ // Avoid NaN due to imprecision in the calculations
+ return Max(dat, Vector3<SType>()).Sqrt();
+ }
+ template <typename SType>
+ static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
+ // Avoid NaN due to imprecision in the calculations
+ return Max(dat, Vector4<SType>()).Sqrt();
+ }
+};
+
+// Useful printing function
+template <typename VType, typename NumType>
+inline std::ostream& operator<<(std::ostream& out,
+ const Stat1<VType, NumType>& s) {
+ out << "{ avg = " << s.Mean()
+ << " std = " << s.StdDev()
+ << " nsamples = " << s.NumSamples() << "}";
+ return out;
+}
+
+
+// Stat1MinMax: same as Stat1, but it also
+// keeps the Min and Max values; the "-"
+// operator is disabled because it cannot be implemented
+// efficiently
+template <typename VType, typename NumType>
+class Stat1MinMax : public Stat1<VType, NumType> {
+ public:
+ typedef Stat1MinMax<VType, NumType> Self;
+
+ Stat1MinMax() {
+ Clear();
+ }
+ void Clear() {
+ Stat1<VType, NumType>::Clear();
+ if (std::numeric_limits<VType>::has_infinity) {
+ min_ = std::numeric_limits<VType>::infinity();
+ max_ = -std::numeric_limits<VType>::infinity();
+ } else {
+ min_ = std::numeric_limits<VType>::max();
+ max_ = std::numeric_limits<VType>::min();
+ }
+ }
+ // Create a sample of value dat and weight 1
+ explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
+ max_ = dat;
+ min_ = dat;
+ }
+ // Create statistics for all the samples between begin (included)
+ // and end(excluded)
+ explicit Stat1MinMax(const VType *begin, const VType *end) {
+ Clear();
+ for ( const VType *item = begin; item < end; ++item ) {
+ (*this) += Stat1MinMax(*item);
+ }
+ }
+ // Create a sample of value dat and weight w
+ Stat1MinMax(const VType &dat, const NumType &w)
+ : Stat1<VType, NumType>(dat, w) {
+ max_ = dat;
+ min_ = dat;
+ }
+ // Copy operator
+ Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
+ max_ = stat.max_;
+ min_ = stat.min_;
+ }
+ inline Self &operator =(const Self &stat) {
+ this->Stat1<VType, NumType>::operator=(stat);
+ max_ = stat.max_;
+ min_ = stat.min_;
+ return (*this);
+ }
+ // Merge statistics from two sample sets.
+ inline Self &operator +=(const Self &stat) {
+ this->Stat1<VType, NumType>::operator+=(stat);
+ if (stat.max_ > max_) max_ = stat.max_;
+ if (stat.min_ < min_) min_ = stat.min_;
+ return (*this);
+ }
+ // Multiply the weight of the set of samples by a factor k
+ inline Self &operator *=(const VType &stat) {
+ this->Stat1<VType, NumType>::operator*=(stat);
+ return (*this);
+ }
+ // Merge statistics from two sample sets.
+ inline Self operator + (const Self &stat) const {
+ return Self(*this) += stat;
+ }
+ // Multiply the weight of the set of samples by a factor k
+ inline Self operator * (const VType &k) const {
+ return Self(*this) *= k;
+ }
+ private:
+ // The - operation makes no sense with Min/Max
+ // unless we keep the full list of values (but we don't)
+ // make it private, and let it undefined so nobody can call it
+ Self &operator -=(const Self &stat); // senseless. let it undefined.
+
+ // The operation opposite to -
+ Self operator - (const Self &stat) const; // senseless. let it undefined.
+
+ public:
+ // Return the maximal value in this sample set
+ VType Max() const {
+ return max_;
+ }
+ // Return the minimal value in this sample set
+ VType Min() const {
+ return min_;
+ }
+ private:
+ // Let i be the index of the samples provided (using +=)
+ // and weight[i],value[i] be the data of sample #i
+ // then the variables have the following meaning:
+ VType max_; // max of value[i]
+ VType min_; // min of value[i]
+};
+
+// Useful printing function
+template <typename VType, typename NumType>
+inline std::ostream& operator <<(std::ostream& out,
+ const Stat1MinMax<VType, NumType>& s) {
+ out << "{ avg = " << s.Mean()
+ << " std = " << s.StdDev()
+ << " nsamples = " << s.NumSamples()
+ << " min = " << s.Min()
+ << " max = " << s.Max() << "}";
+ return out;
+}
+
+#endif // BENCHMARK_STAT_H_
diff --git a/src/sysinfo.cc b/src/sysinfo.cc
new file mode 100644
index 0000000..644b66f
--- /dev/null
+++ b/src/sysinfo.cc
@@ -0,0 +1,337 @@
+#include "sysinfo.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <limits>
+
+#include "cycleclock.h"
+#include "macros.h"
+#include "mutex_lock.h"
+#include "sleep.h"
+
+namespace {
+pthread_once_t cpuinfo_init = PTHREAD_ONCE_INIT;
+double cpuinfo_cycles_per_second = 1.0;
+int cpuinfo_num_cpus = 1; // Conservative guess
+static pthread_mutex_t cputimens_mutex;
+
+// Helper function estimates cycles/sec by observing cycles elapsed during
+// sleep(). Using small sleep time decreases accuracy significantly.
+int64_t EstimateCyclesPerSecond(const int estimate_time_ms) {
+ CHECK(estimate_time_ms > 0);
+ double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much
+
+ const int64_t start_ticks = CycleClock::Now();
+ SleepForMilliseconds(estimate_time_ms);
+ const int64_t guess = int64_t(multiplier * (CycleClock::Now() - start_ticks));
+ return guess;
+}
+
+// Helper function for reading an int from a file. Returns true if successful
+// and the memory location pointed to by value is set to the value read.
+bool ReadIntFromFile(const char *file, int *value) {
+ bool ret = false;
+ int fd = open(file, O_RDONLY);
+ if (fd != -1) {
+ char line[1024];
+ char* err;
+ memset(line, '\0', sizeof(line));
+ CHECK(read(fd, line, sizeof(line) - 1));
+ const int temp_value = strtol(line, &err, 10);
+ if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
+ *value = temp_value;
+ ret = true;
+ }
+ close(fd);
+ }
+ return ret;
+}
+
+void InitializeSystemInfo() {
+ bool saw_mhz = false;
+
+ // TODO: destroy this
+ pthread_mutex_init(&cputimens_mutex, NULL);
+
+#if defined OS_LINUX || defined OS_CYGWIN
+ char line[1024];
+ char* err;
+ int freq;
+
+ // If the kernel is exporting the tsc frequency use that. There are issues
+ // where cpuinfo_max_freq cannot be relied on because the BIOS may be
+ // exporintg an invalid p-state (on x86) or p-states may be used to put the
+ // processor in a new mode (turbo mode). Essentially, those frequencies
+ // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
+ // well.
+ if (!saw_mhz &&
+ ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+ // The value is in kHz (as the file name suggests). For example, on a
+ // 2GHz warpstation, the file contains the value "2000000".
+ cpuinfo_cycles_per_second = freq * 1000.0;
+ saw_mhz = true;
+ }
+
+ // If CPU scaling is in effect, we want to use the *maximum* frequency,
+ // not whatever CPU speed some random processor happens to be using now.
+ if (!saw_mhz &&
+ ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &freq)) {
+ // The value is in kHz. For example, on a 2GHz warpstation, the file
+ // contains the value "2000000".
+ cpuinfo_cycles_per_second = freq * 1000.0;
+ saw_mhz = true;
+ }
+
+ // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
+ const char* pname = "/proc/cpuinfo";
+ int fd = open(pname, O_RDONLY);
+ if (fd == -1) {
+ perror(pname);
+ if (!saw_mhz) {
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+ }
+ return; // TODO: use generic tester instead?
+ }
+
+ double bogo_clock = 1.0;
+ bool saw_bogo = false;
+ int max_cpu_id = 0;
+ int num_cpus = 0;
+ line[0] = line[1] = '\0';
+ int chars_read = 0;
+ do { // we'll exit when the last read didn't read anything
+ // Move the next line to the beginning of the buffer
+ const int oldlinelen = strlen(line);
+ if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
+ line[0] = '\0';
+ else // still other lines left to save
+ memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
+ // Terminate the new line, reading more if we can't find the newline
+ char* newline = strchr(line, '\n');
+ if (newline == NULL) {
+ const int linelen = strlen(line);
+ const int bytes_to_read = sizeof(line)-1 - linelen;
+ CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
+ chars_read = read(fd, line + linelen, bytes_to_read);
+ line[linelen + chars_read] = '\0';
+ newline = strchr(line, '\n');
+ }
+ if (newline != NULL)
+ *newline = '\0';
+
+ // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
+ // accept postive values. Some environments (virtual machines) report zero,
+ // which would cause infinite looping in WallTime_Init.
+ if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
+ const char* freqstr = strchr(line, ':');
+ if (freqstr) {
+ cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
+ if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
+ saw_mhz = true;
+ }
+ } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
+ const char* freqstr = strchr(line, ':');
+ if (freqstr) {
+ bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
+ if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
+ saw_bogo = true;
+ }
+ } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
+ num_cpus++; // count up every time we see an "processor :" entry
+ const char* freqstr = strchr(line, ':');
+ if (freqstr) {
+ const int cpu_id = strtol(freqstr+1, &err, 10);
+ if (freqstr[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
+ max_cpu_id = cpu_id;
+ }
+ }
+ } while (chars_read > 0);
+ close(fd);
+
+ if (!saw_mhz) {
+ if (saw_bogo) {
+ // If we didn't find anything better, we'll use bogomips, but
+ // we're not happy about it.
+ cpuinfo_cycles_per_second = bogo_clock;
+ } else {
+ // If we don't even have bogomips, we'll use the slow estimation.
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+ }
+ }
+ if (num_cpus == 0) {
+ fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
+ } else {
+ if ((max_cpu_id + 1) != num_cpus) {
+ fprintf(stderr,
+ "CPU ID assignments in /proc/cpuinfo seems messed up."
+ " This is usually caused by a bad BIOS.\n");
+ }
+ cpuinfo_num_cpus = num_cpus;
+ }
+
+#elif defined OS_FREEBSD
+ // For this sysctl to work, the machine must be configured without
+ // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
+ // and later. Before that, it's a 32-bit quantity (and gives the
+ // wrong answer on machines faster than 2^32 Hz). See
+ // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
+ // But also compare FreeBSD 7.0:
+ // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
+ // 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
+ // To FreeBSD 6.3 (it's the same in 6-STABLE):
+ // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
+ // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+#if __FreeBSD__ >= 7
+ uint64_t hz = 0;
+#else
+ unsigned int hz = 0;
+#endif
+ size_t sz = sizeof(hz);
+ const char *sysctl_path = "machdep.tsc_freq";
+ if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
+ fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
+ sysctl_path, strerror(errno));
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+ } else {
+ cpuinfo_cycles_per_second = hz;
+ }
+ // TODO: also figure out cpuinfo_num_cpus
+
+#elif defined OS_WINDOWS
+# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
+ // In NT, read MHz from the registry. If we fail to do so or we're in win9x
+ // then make a crude estimate.
+ OSVERSIONINFO os;
+ os.dwOSVersionInfoSize = sizeof(os);
+ DWORD data, data_size = sizeof(data);
+ if (GetVersionEx(&os) &&
+ os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
+ SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
+ "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
+ "~MHz", NULL, &data, &data_size)))
+ cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
+ else
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
+ // TODO: also figure out cpuinfo_num_cpus
+
+#elif defined OS_MACOSX
+ // returning "mach time units" per second. the current number of elapsed
+ // mach time units can be found by calling uint64 mach_absolute_time();
+ // while not as precise as actual CPU cycles, it is accurate in the face
+ // of CPU frequency scaling and multi-cpu/core machines.
+ // Our mac users have these types of machines, and accuracy
+ // (i.e. correctness) trumps precision.
+ // See cycleclock.h: CycleClock::Now(), which returns number of mach time
+ // units on Mac OS X.
+ mach_timebase_info_data_t timebase_info;
+ mach_timebase_info(&timebase_info);
+ double mach_time_units_per_nanosecond =
+ static_cast<double>(timebase_info.denom) /
+ static_cast<double>(timebase_info.numer);
+ cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
+
+ int num_cpus = 0;
+ size_t size = sizeof(num_cpus);
+ int numcpus_name[] = { CTL_HW, HW_NCPU };
+ if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
+ == 0
+ && (size == sizeof(num_cpus)))
+ cpuinfo_num_cpus = num_cpus;
+
+#else
+ // Generic cycles per second counter
+ cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+#endif
+}
+} // end namespace
+
+#ifndef OS_WINDOWS
+// getrusage() based implementation of MyCPUUsage
+static double MyCPUUsageRUsage() {
+ struct rusage ru;
+ if (getrusage(RUSAGE_SELF, &ru) == 0) {
+ return (static_cast<double>(ru.ru_utime.tv_sec) +
+ static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
+ static_cast<double>(ru.ru_stime.tv_sec) +
+ static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
+ } else {
+ return 0.0;
+ }
+}
+
+static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
+ static int cputime_fd = -1;
+ if (cputime_fd == -1) {
+ cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
+ if (cputime_fd < 0) {
+ cputime_fd = -1;
+ return false;
+ }
+ }
+ char buff[64];
+ memset(buff, 0, sizeof(buff));
+ if (pread(cputime_fd, buff, sizeof(buff)-1, 0) <= 0) {
+ close(cputime_fd);
+ cputime_fd = -1;
+ return false;
+ }
+ unsigned long long result = strtoull(buff, NULL, 0);
+ if (result == (std::numeric_limits<unsigned long long>::max)()) {
+ close(cputime_fd);
+ cputime_fd = -1;
+ return false;
+ }
+ *cputime = static_cast<double>(result) / 1e9;
+ return true;
+}
+
+double MyCPUUsage() {
+ {
+ mutex_lock l(&cputimens_mutex);
+ static bool use_cputime_ns = true;
+ if (use_cputime_ns) {
+ double value;
+ if (MyCPUUsageCPUTimeNsLocked(&value)) {
+ return value;
+ }
+ // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
+ std::cout << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
+ use_cputime_ns = false;
+ }
+ }
+ return MyCPUUsageRUsage();
+}
+
+double ChildrenCPUUsage() {
+ struct rusage ru;
+ if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
+ return (static_cast<double>(ru.ru_utime.tv_sec) +
+ static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
+ static_cast<double>(ru.ru_stime.tv_sec) +
+ static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
+ } else {
+ return 0.0;
+ }
+}
+#endif // OS_WINDOWS
+
+double CyclesPerSecond(void) {
+ pthread_once(&cpuinfo_init, &InitializeSystemInfo);
+ return cpuinfo_cycles_per_second;
+}
+
+int NumCPUs(void) {
+ pthread_once(&cpuinfo_init, &InitializeSystemInfo);
+ return cpuinfo_num_cpus;
+}
+
diff --git a/src/sysinfo.h b/src/sysinfo.h
new file mode 100644
index 0000000..0b85d5c
--- /dev/null
+++ b/src/sysinfo.h
@@ -0,0 +1,9 @@
+#ifndef BENCHMARK_SYSINFO_H_
+#define BENCHMARK_SYSINFO_H_
+
+double MyCPUUsage();
+double ChildrenCPUUsage();
+int NumCPUs();
+double CyclesPerSecond();
+
+#endif // BENCHMARK_SYSINFO_H_
diff --git a/src/walltime.cc b/src/walltime.cc
new file mode 100644
index 0000000..85384aa
--- /dev/null
+++ b/src/walltime.cc
@@ -0,0 +1,137 @@
+#include "walltime.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include <atomic>
+#include <limits>
+
+#include "cycleclock.h"
+#include "macros.h"
+#include "sysinfo.h"
+
+namespace walltime {
+namespace {
+const double kMaxErrorInterval = 100e-6;
+
+std::atomic<bool> initialized(false);
+WallTime base_walltime = 0.0;
+int64_t base_cycletime = 0;
+int64_t cycles_per_second;
+double seconds_per_cycle;
+uint32_t last_adjust_time = 0;
+std::atomic<int32_t> drift_adjust(0);
+int64_t max_interval_cycles = 0;
+
+// Helper routines to load/store a float from an AtomicWord. Required because
+// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to get
+// rid of this horror show.
+inline void SetDrift(float f) {
+ int32_t w;
+ memcpy(&w, &f, sizeof(f));
+ std::atomic_store(&drift_adjust, w);
+}
+
+inline float GetDrift() {
+ float f;
+ int32_t w = std::atomic_load(&drift_adjust);
+ memcpy(&f, &w, sizeof(f));
+ return f;
+}
+
+static_assert(sizeof(float) <= sizeof(int32_t),
+ "type sizes don't allow the drift_adjust hack");
+
+WallTime Slow() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+bool SplitTimezone(WallTime value, bool local, struct tm* t,
+ double* subsecond) {
+ memset(t, 0, sizeof(*t));
+ if ((value < 0) || (value > std::numeric_limits<time_t>::max())) {
+ *subsecond = 0.0;
+ return false;
+ }
+ const time_t whole_time = static_cast<time_t>(value);
+ *subsecond = value - whole_time;
+ if (local)
+ localtime_r(&whole_time, t);
+ else
+ gmtime_r(&whole_time, t);
+ return true;
+}
+} // end namespace
+
+// This routine should be invoked to initialize walltime.
+// It is not intended for general purpose use.
+void Initialize() {
+ CHECK(!std::atomic_load(&initialized));
+ cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
+ CHECK(cycles_per_second != 0);
+ seconds_per_cycle = 1.0 / cycles_per_second;
+ max_interval_cycles = static_cast<int64_t>(
+ cycles_per_second * kMaxErrorInterval);
+ do {
+ base_cycletime = CycleClock::Now();
+ base_walltime = Slow();
+ } while (CycleClock::Now() - base_cycletime > max_interval_cycles);
+ // We are now sure that "base_walltime" and "base_cycletime" were produced
+ // within kMaxErrorInterval of one another.
+
+ SetDrift(0.0);
+ last_adjust_time = static_cast<uint32_t>(uint64_t(base_cycletime) >> 32);
+ std::atomic_store(&initialized, true);
+}
+
+WallTime Now() {
+ if (!std::atomic_load(&initialized))
+ return Slow();
+
+ WallTime now = 0.0;
+ WallTime result = 0.0;
+ int64_t ct = 0;
+ uint32_t top_bits = 0;
+ do {
+ ct = CycleClock::Now();
+ int64_t cycle_delta = ct - base_cycletime;
+ result = base_walltime + cycle_delta * seconds_per_cycle;
+
+ top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
+ // Recompute drift no more often than every 2^32 cycles.
+ // I.e., @2GHz, ~ every two seconds
+ if (top_bits == last_adjust_time) { // don't need to recompute drift
+ return result + GetDrift();
+ }
+
+ now = Slow();
+ } while (CycleClock::Now() - ct > max_interval_cycles);
+ // We are now sure that "now" and "result" were produced within
+ // kMaxErrorInterval of one another.
+
+ SetDrift(now - result);
+ last_adjust_time = top_bits;
+ return now;
+}
+
+const char* Print(WallTime time, const char *format, bool local,
+ char* storage, int *remainder_us) {
+ struct tm split;
+ double subsecond;
+ if (!SplitTimezone(time, local, &split, &subsecond)) {
+ snprintf(storage, sizeof(storage), "Invalid time: %f", time);
+ } else {
+ if (remainder_us != NULL) {
+ *remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
+ if (*remainder_us > 999999) *remainder_us = 999999;
+ if (*remainder_us < 0) *remainder_us = 0;
+ }
+ strftime(storage, sizeof(storage), format, &split);
+ }
+ return storage;
+}
+} // end namespace walltime
diff --git a/src/walltime.h b/src/walltime.h
new file mode 100644
index 0000000..13cda80
--- /dev/null
+++ b/src/walltime.h
@@ -0,0 +1,19 @@
+#ifndef BENCHMARK_WALLTIME_H_
+#define BENCHMARK_WALLTIME_H_
+
+typedef double WallTime;
+
+namespace walltime {
+void Initialize();
+WallTime Now();
+
+// GIVEN: walltime, generic format string (as understood by strftime),
+// a boolean flag specifying if the time is local or UTC (true=local).
+// RETURNS: the formatted string. ALSO RETURNS: the storage printbuffer
+// passed and the remaining number of microseconds (never printed in
+// the string since strftime does not understand it)
+const char* Print(WallTime time, const char *format, bool local,
+ char* storage, int *remainder_us);
+} // end namespace walltime
+
+#endif // BENCHMARK_WALLTIME_H_
diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc
new file mode 100644
index 0000000..7c0c6d7
--- /dev/null
+++ b/test/benchmark_test.cc
@@ -0,0 +1,138 @@
+#include "benchmark/benchmark.h"
+
+#include <math.h>
+#include <stdint.h>
+
+#include <limits>
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <vector>
+
+namespace {
+
+int ATTRIBUTE_NOINLINE Factorial(uint32_t n) {
+ return (n == 1) ? 1 : n * Factorial(n - 1);
+}
+
+double CalculatePi(int depth) {
+ double pi = 0.0;
+ for (int i = 0; i < depth; ++i) {
+ double numerator = static_cast<double>(((i % 2) * 2) - 1);
+ double denominator = static_cast<double>((2 * i) - 1);
+ pi += numerator / denominator;
+ }
+ return (pi - 1.0) * 4;
+}
+
+std::set<int> ConstructRandomSet(int size) {
+ std::set<int> s;
+ for (int i = 0; i < size; ++i)
+ s.insert(i);
+ return s;
+}
+
+static std::vector<int>* test_vector = NULL;
+
+} // end namespace
+
+#ifdef DEBUG
+static void BM_Factorial(benchmark::State& state) {
+ int fac_42 = 0;
+ while (state.KeepRunning())
+ fac_42 = Factorial(8);
+ // Prevent compiler optimizations
+ CHECK(fac_42 != std::numeric_limits<int>::max());
+}
+BENCHMARK(BM_Factorial);
+#endif
+
+static void BM_CalculatePiRange(benchmark::State& state) {
+ double pi = 0.0;
+ while (state.KeepRunning())
+ pi = CalculatePi(state.range_x());
+ std::stringstream ss;
+ ss << pi;
+ state.SetLabel(ss.str());
+}
+BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
+
+static void BM_CalculatePi(benchmark::State& state) {
+ static const int depth = 1024;
+ double pi ATTRIBUTE_UNUSED = 0.0;
+ while (state.KeepRunning()) {
+ pi = CalculatePi(depth);
+ }
+}
+BENCHMARK(BM_CalculatePi)->Threads(8);
+BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
+BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
+
+static void BM_SetInsert(benchmark::State& state) {
+ while (state.KeepRunning()) {
+ state.PauseTiming();
+ std::set<int> data = ConstructRandomSet(state.range_x());
+ state.ResumeTiming();
+ for (int j = 0; j < state.range_y(); ++j)
+ data.insert(rand());
+ }
+}
+BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
+
+template<typename Q>
+static void BM_Sequential(benchmark::State& state) {
+ Q q;
+ typename Q::value_type v;
+ while (state.KeepRunning())
+ for (int i = state.range_x(); --i; )
+ q.push_back(v);
+ const int64_t items_processed =
+ static_cast<int64_t>(state.iterations()) * state.range_x();
+ state.SetItemsProcessed(items_processed);
+ state.SetBytesProcessed(items_processed * sizeof(v));
+}
+BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
+BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
+
+static void BM_StringCompare(benchmark::State& state) {
+ std::string s1(state.range_x(), '-');
+ std::string s2(state.range_x(), '-');
+ int r = 0;
+ while (state.KeepRunning())
+ r |= s1.compare(s2);
+ // Prevent compiler optimizations
+ CHECK(r != std::numeric_limits<int>::max());
+}
+BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
+
+static void BM_SetupTeardown(benchmark::State& state) {
+ if (state.thread_index == 0)
+ test_vector = new std::vector<int>();
+ while (state.KeepRunning())
+ test_vector->push_back(0);
+ if (state.thread_index == 0) {
+ delete test_vector;
+ test_vector = NULL;
+ }
+}
+BENCHMARK(BM_SetupTeardown);
+
+static void BM_LongTest(benchmark::State& state) {
+ double tracker = 0.0;
+ while (state.KeepRunning())
+ for (int i = 0; i < state.range_x(); ++i)
+ tracker += i;
+ CHECK(tracker != 0.0);
+}
+BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
+
+int main(int argc, const char* argv[]) {
+ benchmark::Initialize(&argc, argv);
+
+ CHECK(Factorial(8) == 40320);
+ CHECK(CalculatePi(1) == 0.0);
+
+ benchmark::RunSpecifiedBenchmarks();
+}
+