examples/src/main/java/examples/BitSetBenchmark.java - platform/external/caliper - Git at Google

 /*
  * Copyright (C) 2010 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package examples;


 import com.google.caliper.BeforeExperiment;
 import com.google.caliper.Benchmark;

 import java.util.BitSet;
 import java.util.Random;

 /**
  * A simple example of a benchmark for BitSet showing some of the issues with
  * micro-benchmarking.
  *
  * <p>The following is a discussion of how the benchmarks evolved and what they
  * may (or may not) tell us. This discussion is based on the following set of
  * results:
  *
  * <p><pre>
  *  0% Scenario{vm=java, benchmark=SetBitSetX64} 233.45ns; σ=0.31ns @ 3 trials
  * 20% Scenario{vm=java, benchmark=SetMaskX64} 116.62ns; σ=0.09ns @ 3 trials
  * 40% Scenario{vm=java, benchmark=CharsToBitSet} 748.40ns; σ=23.52ns @ 10 trials
  * 60% Scenario{vm=java, benchmark=CharsToMask} 198.55ns; σ=9.46ns @ 10 trials
  * 80% Scenario{vm=java, benchmark=BaselineIteration} 67.85ns; σ=0.44ns @ 3 trials
  *
  *         benchmark   ns logarithmic runtime
  *      SetBitSetX64  233 XXXXXXXXX|||||||||||||||
  *        SetMaskX64  117 XXXX|||||||||||||||||
  *     CharsToBitSet  748 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  *       CharsToMask  199 XXXXXXX||||||||||||||||
  * BaselineIteration   68 XX|||||||||||||||||
  * </pre>
  *
  * <p>Initially things look simple. The {@link #setBitSetX64(int)} benchmark
  * takes approximately twice as long as {@link #setMaskX64(int)}. However
  * the inner loops in these benchmarks have almost no content, so a more
  * 'real world' benchmark was devised in an attempt to back up these results.
  *
  * <p>The {@link #charsToMask(int)} and {@link #charsToBitSet(int)}
  * benchmarks convert a simple char[] of '1's and '0's to a corresponding BitSet
  * or bit mask. These also processes 64 bits per iteration and so appears to be
  * doing the same amount of work as the first benchmarks.
  *
  * <p>Additionally the {@link BitSetBenchmark#baselineIteration(int)}
  * benchmark attempts to measure the raw cost of looping through and reading the
  * source data.
  *
  * <p>When comparing the benchmarks that use bit masking, we see that the
  * measured time of the SetMaskX64 benchmark (117ns) is roughly the same
  * as the CharsToMask benchmark (199ns) with the BaselineIteration time (68ms)
  * subtracted from it. This gives us some confidence that both benchmarks are
  * resulting in the same underlying work on the CPU.
  *
  * <p>However the CharsToBitSet and the SetBitSetX64 benchmarks differ very
  * significantly (approximately 3x) even when accounting for the
  * BaselineIteration result. This suggests that the performance of
  * {@link BitSet#set} is quite dependent on the surrounding code and how
  * it is optimized by the JVM.
  *
  * <p>The conclusions we can draw from this are:
  *
  * <p><b>1:</b> Using BitSet is slower than using bit masks directly. At best it
  * seems about 2x slower than a bit mask, but could easily be 5x slower in real
  * applications.
  *
  * <p>While these are only estimates, we can conclude that when performance is
  * important and where bit set operations occur in tight loops, bit masks
  * should be used in favor of BitSets.
  *
  * <p><b>2:</b>Overly simplistic benchmarks can give a very false impression of
  * performance.
  */
 public class BitSetBenchmark {
   private BitSet bitSet;
   private char[] bitString;

   @BeforeExperiment void setUp() throws Exception {
     bitSet = new BitSet(64);
     bitString = new char[64];
     Random r = new Random();
     for (int n = 0; n < 64; n++) {
       bitString[n] = r.nextBoolean() ? '1' : '0';
     }
   }

   /**
    * This benchmark attempts to measure performance of {@link BitSet#set}.
    */
   @Benchmark int setBitSetX64(int reps) {
     long count = 64L * reps;
     for (int i = 0; i < count; i++) {
       bitSet.set(i & 0x3F, true);
     }
     return bitSet.hashCode();
   }

   /**
    * This benchmark attempts to measure performance of direct bit-manipulation.
    */
   @Benchmark long setMaskX64(int reps) {
     long count = 64L * reps;
     long bitMask = 0L;
     for (int i = 0; i < count; i++) {
       bitMask |= 1 << (i & 0x3F);
     }
     return bitMask;
   }

   /**
    * This benchmark parses a char[] of 1's and 0's into a BitSet. Results from
    * this benchmark should be comparable with those from
    * {@link #charsToMask(int)}.
    */
   @Benchmark String charsToBitSet(int reps) {
     /*
      * This benchmark now measures the complete parsing of a char[] rather than
      * a single invocation of {@link BitSet#set}. However this fine because
      * it is intended to be a comparative benchmark.
      */
     for (int i = 0; i < reps; i++) {
       for (int n = 0; n < bitString.length; n++) {
         bitSet.set(n, bitString[n] == '1');
       }
     }
     return bitSet.toString();
   }

   /**
    * This benchmark parses a char[] of 1's and 0's into a bit mask. Results from
    * this benchmark should be comparable with those from
    * {@link #charsToBitSet(int)}.
    */
   @Benchmark long charsToMask(int reps) {
     /*
      * Comparing results we see a far more realistic sounding result whereby
      * using a bit mask is a little over 4x faster than using BitSet.
      */
     long bitMask = 0;
     for (int i = 0; i < reps; i++) {
       for (int n = 0; n < bitString.length; n++) {
         long m = 1 << n;
         if (bitString[n] == '1') {
           bitMask |= m;
         } else {
           bitMask &= ~m;
         }
       }
     }
     return bitMask;
   }

   /**
    * This benchmark attempts to measure the baseline cost of both
    * {@link #charsToBitSet(int)} and {@link #charsToMask(int)}.
    * It does this by unconditionally summing the character values of the char[].
    * This is as close to a no-op case as we can expect to get without unwanted
    * over-optimization.
    */
   @Benchmark long baselineIteration(int reps) {
     int badHash = 0;
     for (int i = 0; i < reps; i++) {
       for (int n = 0; n < bitString.length; n++) {
         badHash += bitString[n];
       }
     }
     return badHash;
   }
 }
	/*
	* Copyright (C) 2010 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package examples;


	import com.google.caliper.BeforeExperiment;
	import com.google.caliper.Benchmark;

	import java.util.BitSet;
	import java.util.Random;

	/**
	* A simple example of a benchmark for BitSet showing some of the issues with
	* micro-benchmarking.
	*
	* <p>The following is a discussion of how the benchmarks evolved and what they
	* may (or may not) tell us. This discussion is based on the following set of
	* results:
	*
	* <p><pre>
	* 0% Scenario{vm=java, benchmark=SetBitSetX64} 233.45ns; σ=0.31ns @ 3 trials
	* 20% Scenario{vm=java, benchmark=SetMaskX64} 116.62ns; σ=0.09ns @ 3 trials
	* 40% Scenario{vm=java, benchmark=CharsToBitSet} 748.40ns; σ=23.52ns @ 10 trials
	* 60% Scenario{vm=java, benchmark=CharsToMask} 198.55ns; σ=9.46ns @ 10 trials
	* 80% Scenario{vm=java, benchmark=BaselineIteration} 67.85ns; σ=0.44ns @ 3 trials
	*
	* benchmark ns logarithmic runtime
	* SetBitSetX64 233 XXXXXXXXX\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	* SetMaskX64 117 XXXX\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	* CharsToBitSet 748 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
	* CharsToMask 199 XXXXXXX\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	* BaselineIteration 68 XX\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	* </pre>
	*
	* <p>Initially things look simple. The {@link #setBitSetX64(int)} benchmark
	* takes approximately twice as long as {@link #setMaskX64(int)}. However
	* the inner loops in these benchmarks have almost no content, so a more
	* 'real world' benchmark was devised in an attempt to back up these results.
	*
	* <p>The {@link #charsToMask(int)} and {@link #charsToBitSet(int)}
	* benchmarks convert a simple char[] of '1's and '0's to a corresponding BitSet
	* or bit mask. These also processes 64 bits per iteration and so appears to be
	* doing the same amount of work as the first benchmarks.
	*
	* <p>Additionally the {@link BitSetBenchmark#baselineIteration(int)}
	* benchmark attempts to measure the raw cost of looping through and reading the
	* source data.
	*
	* <p>When comparing the benchmarks that use bit masking, we see that the
	* measured time of the SetMaskX64 benchmark (117ns) is roughly the same
	* as the CharsToMask benchmark (199ns) with the BaselineIteration time (68ms)
	* subtracted from it. This gives us some confidence that both benchmarks are
	* resulting in the same underlying work on the CPU.
	*
	* <p>However the CharsToBitSet and the SetBitSetX64 benchmarks differ very
	* significantly (approximately 3x) even when accounting for the
	* BaselineIteration result. This suggests that the performance of
	* {@link BitSet#set} is quite dependent on the surrounding code and how
	* it is optimized by the JVM.
	*
	* <p>The conclusions we can draw from this are:
	*
	* <p><b>1:</b> Using BitSet is slower than using bit masks directly. At best it
	* seems about 2x slower than a bit mask, but could easily be 5x slower in real
	* applications.
	*
	* <p>While these are only estimates, we can conclude that when performance is
	* important and where bit set operations occur in tight loops, bit masks
	* should be used in favor of BitSets.
	*
	* <p><b>2:</b>Overly simplistic benchmarks can give a very false impression of
	* performance.
	*/
	public class BitSetBenchmark {
	private BitSet bitSet;
	private char[] bitString;

	@BeforeExperiment void setUp() throws Exception {
	bitSet = new BitSet(64);
	bitString = new char[64];
	Random r = new Random();
	for (int n = 0; n < 64; n++) {
	bitString[n] = r.nextBoolean() ? '1' : '0';
	}
	}

	/**
	* This benchmark attempts to measure performance of {@link BitSet#set}.
	*/
	@Benchmark int setBitSetX64(int reps) {
	long count = 64L * reps;
	for (int i = 0; i < count; i++) {
	bitSet.set(i & 0x3F, true);
	}
	return bitSet.hashCode();
	}

	/**
	* This benchmark attempts to measure performance of direct bit-manipulation.
	*/
	@Benchmark long setMaskX64(int reps) {
	long count = 64L * reps;
	long bitMask = 0L;
	for (int i = 0; i < count; i++) {
	bitMask \|= 1 << (i & 0x3F);
	}
	return bitMask;
	}

	/**
	* This benchmark parses a char[] of 1's and 0's into a BitSet. Results from
	* this benchmark should be comparable with those from
	* {@link #charsToMask(int)}.
	*/
	@Benchmark String charsToBitSet(int reps) {
	/*
	* This benchmark now measures the complete parsing of a char[] rather than
	* a single invocation of {@link BitSet#set}. However this fine because
	* it is intended to be a comparative benchmark.
	*/
	for (int i = 0; i < reps; i++) {
	for (int n = 0; n < bitString.length; n++) {
	bitSet.set(n, bitString[n] == '1');
	}
	}
	return bitSet.toString();
	}

	/**
	* This benchmark parses a char[] of 1's and 0's into a bit mask. Results from
	* this benchmark should be comparable with those from
	* {@link #charsToBitSet(int)}.
	*/
	@Benchmark long charsToMask(int reps) {
	/*
	* Comparing results we see a far more realistic sounding result whereby
	* using a bit mask is a little over 4x faster than using BitSet.
	*/
	long bitMask = 0;
	for (int i = 0; i < reps; i++) {
	for (int n = 0; n < bitString.length; n++) {
	long m = 1 << n;
	if (bitString[n] == '1') {
	bitMask \|= m;
	} else {
	bitMask &= ~m;
	}
	}
	}
	return bitMask;
	}

	/**
	* This benchmark attempts to measure the baseline cost of both
	* {@link #charsToBitSet(int)} and {@link #charsToMask(int)}.
	* It does this by unconditionally summing the character values of the char[].
	* This is as close to a no-op case as we can expect to get without unwanted
	* over-optimization.
	*/
	@Benchmark long baselineIteration(int reps) {
	int badHash = 0;
	for (int i = 0; i < reps; i++) {
	for (int n = 0; n < bitString.length; n++) {
	badHash += bitString[n];
	}
	}
	return badHash;
	}
	}