examples/src/main/java/examples/Utf8Benchmark.java - platform/external/caliper - Git at Google

 /*
  * Copyright (C) 2011 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package examples;

 import com.google.caliper.BeforeExperiment;
 import com.google.caliper.Benchmark;
 import com.google.caliper.Param;

 import java.nio.charset.Charset;
 import java.util.Random;

 /**
  * Benchmark for operations with the UTF-8 charset.
  */
 public class Utf8Benchmark {

   static final Charset UTF_8 = Charset.forName("UTF-8");

   /**
    * The maximum code point used in generated text.  Different values
    * provide reasonable models of different real-world human text.
    */
   static class MaxCodePoint {
     final int value;

     /**
      * Convert the input string to a code point.  Accepts regular
      * decimal numerals, hex strings, and some symbolic names
      * meaningful to humans.
      */
     private static int decode(String userFriendly) {
       try {
         return Integer.decode(userFriendly);
       } catch (NumberFormatException ignored) {
         if (userFriendly.matches("(?i)(?:American|English|ASCII)")) {
           // 1-byte UTF-8 sequences - "American" ASCII text
           return 0x80;
         } else if (userFriendly.matches("(?i)(?:French|Latin|Western.*European)")) {
           // Mostly 1-byte UTF-8 sequences, mixed with occasional 2-byte
           // sequences - "Western European" text
           return 0x90;
         } else if (userFriendly.matches("(?i)(?:Branch.*Prediction.*Hostile)")) {
           // Defeat branch predictor for: c < 0x80 ; branch taken 50% of the time.
           return 0x100;
         } else if (userFriendly.matches("(?i)(?:Greek|Cyrillic|European|ISO.?8859)")) {
           // Mostly 2-byte UTF-8 sequences - "European" text
           return 0x800;
         } else if (userFriendly.matches("(?i)(?:Chinese|Han|Asian|BMP)")) {
           // Mostly 3-byte UTF-8 sequences - "Asian" text
           return Character.MIN_SUPPLEMENTARY_CODE_POINT;
         } else if (userFriendly.matches("(?i)(?:Cuneiform|rare|exotic|supplementary.*)")) {
           // Mostly 4-byte UTF-8 sequences - "rare exotic" text
           return Character.MAX_CODE_POINT;
         } else {
           throw new IllegalArgumentException("Can't decode codepoint " + userFriendly);
         }
       }
     }

     public static MaxCodePoint valueOf(String userFriendly) {
       return new MaxCodePoint(userFriendly);
     }

     private MaxCodePoint(String userFriendly) {
       value = decode(userFriendly);
     }
   }

   /**
    * The default values of maxCodePoint below provide pretty good
    * performance models of different kinds of common human text.
    * @see MaxCodePoint#decode
    */
   @Param({"0x80", "0x100", "0x800", "0x10000", "0x10ffff"}) MaxCodePoint maxCodePoint;

   static final int STRING_COUNT = 1 << 7;

   @Param({"65536"}) int charCount;
   private String[] strings;

   /**
    * Computes arrays of valid unicode Strings.
    */
   @BeforeExperiment void setUp() {
     final long seed = 99;
     final Random rnd = new Random(seed);
     strings = new String[STRING_COUNT];
     for (int i = 0; i < STRING_COUNT; i++) {
       StringBuilder sb = new StringBuilder();
       for (int j = 0; j < charCount; j++) {
         int codePoint;
         // discard illegal surrogate "codepoints"
         do {
           codePoint = rnd.nextInt(maxCodePoint.value);
         } while (isSurrogate(codePoint));
         sb.appendCodePoint(codePoint);
       }
       strings[i] = sb.toString();
     }
     // The reps will continue until the non-determinism detector is pacified!
     getBytes(100);
   }

   /**
    * Benchmarks {@link String#getBytes} on valid strings containing
    * pseudo-randomly-generated codePoints less than {@code
    * maxCodePoint}.  A constant seed is used, so separate runs perform
    * identical computations.
    */
   @Benchmark void getBytes(int reps) {
     final String[] strings = this.strings;
     final int mask = STRING_COUNT - 1;
     for (int i = 0; i < reps; i++) {
       String string = strings[i & mask];
       byte[] bytes = string.getBytes(UTF_8);
       if (bytes[0] == 86 && bytes[bytes.length - 1] == 99) {
         throw new Error("Unlikely! We're just defeating the optimizer!");
       }
     }
   }

   /** Character.isSurrogate was added in Java SE 7. */
   private boolean isSurrogate(int c) {
     return (Character.MIN_HIGH_SURROGATE <= c &&
             c <= Character.MAX_LOW_SURROGATE);
   }
 }
	/*
	* Copyright (C) 2011 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package examples;

	import com.google.caliper.BeforeExperiment;
	import com.google.caliper.Benchmark;
	import com.google.caliper.Param;

	import java.nio.charset.Charset;
	import java.util.Random;

	/**
	* Benchmark for operations with the UTF-8 charset.
	*/
	public class Utf8Benchmark {

	static final Charset UTF_8 = Charset.forName("UTF-8");

	/**
	* The maximum code point used in generated text. Different values
	* provide reasonable models of different real-world human text.
	*/
	static class MaxCodePoint {
	final int value;

	/**
	* Convert the input string to a code point. Accepts regular
	* decimal numerals, hex strings, and some symbolic names
	* meaningful to humans.
	*/
	private static int decode(String userFriendly) {
	try {
	return Integer.decode(userFriendly);
	} catch (NumberFormatException ignored) {
	if (userFriendly.matches("(?i)(?:American\|English\|ASCII)")) {
	// 1-byte UTF-8 sequences - "American" ASCII text
	return 0x80;
	} else if (userFriendly.matches("(?i)(?:French\|Latin\|Western.*European)")) {
	// Mostly 1-byte UTF-8 sequences, mixed with occasional 2-byte
	// sequences - "Western European" text
	return 0x90;
	} else if (userFriendly.matches("(?i)(?:Branch.Prediction.Hostile)")) {
	// Defeat branch predictor for: c < 0x80 ; branch taken 50% of the time.
	return 0x100;
	} else if (userFriendly.matches("(?i)(?:Greek\|Cyrillic\|European\|ISO.?8859)")) {
	// Mostly 2-byte UTF-8 sequences - "European" text
	return 0x800;
	} else if (userFriendly.matches("(?i)(?:Chinese\|Han\|Asian\|BMP)")) {
	// Mostly 3-byte UTF-8 sequences - "Asian" text
	return Character.MIN_SUPPLEMENTARY_CODE_POINT;
	} else if (userFriendly.matches("(?i)(?:Cuneiform\|rare\|exotic\|supplementary.*)")) {
	// Mostly 4-byte UTF-8 sequences - "rare exotic" text
	return Character.MAX_CODE_POINT;
	} else {
	throw new IllegalArgumentException("Can't decode codepoint " + userFriendly);
	}
	}
	}

	public static MaxCodePoint valueOf(String userFriendly) {
	return new MaxCodePoint(userFriendly);
	}

	private MaxCodePoint(String userFriendly) {
	value = decode(userFriendly);
	}
	}

	/**
	* The default values of maxCodePoint below provide pretty good
	* performance models of different kinds of common human text.
	* @see MaxCodePoint#decode
	*/
	@Param({"0x80", "0x100", "0x800", "0x10000", "0x10ffff"}) MaxCodePoint maxCodePoint;

	static final int STRING_COUNT = 1 << 7;

	@Param({"65536"}) int charCount;
	private String[] strings;

	/**
	* Computes arrays of valid unicode Strings.
	*/
	@BeforeExperiment void setUp() {
	final long seed = 99;
	final Random rnd = new Random(seed);
	strings = new String[STRING_COUNT];
	for (int i = 0; i < STRING_COUNT; i++) {
	StringBuilder sb = new StringBuilder();
	for (int j = 0; j < charCount; j++) {
	int codePoint;
	// discard illegal surrogate "codepoints"
	do {
	codePoint = rnd.nextInt(maxCodePoint.value);
	} while (isSurrogate(codePoint));
	sb.appendCodePoint(codePoint);
	}
	strings[i] = sb.toString();
	}
	// The reps will continue until the non-determinism detector is pacified!
	getBytes(100);
	}

	/**
	* Benchmarks {@link String#getBytes} on valid strings containing
	* pseudo-randomly-generated codePoints less than {@code
	* maxCodePoint}. A constant seed is used, so separate runs perform
	* identical computations.
	*/
	@Benchmark void getBytes(int reps) {
	final String[] strings = this.strings;
	final int mask = STRING_COUNT - 1;
	for (int i = 0; i < reps; i++) {
	String string = strings[i & mask];
	byte[] bytes = string.getBytes(UTF_8);
	if (bytes[0] == 86 && bytes[bytes.length - 1] == 99) {
	throw new Error("Unlikely! We're just defeating the optimizer!");
	}
	}
	}

	/** Character.isSurrogate was added in Java SE 7. */
	private boolean isSurrogate(int c) {
	return (Character.MIN_HIGH_SURROGATE <= c &&
	c <= Character.MAX_LOW_SURROGATE);
	}
	}