guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java - platform/external/guava - Git at Google

 /*
  * Copyright (C) 2017 The Guava Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  * in compliance with the License. You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software distributed under the License
  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */

 package com.google.common.io;

 import com.google.caliper.BeforeExperiment;
 import com.google.caliper.Benchmark;
 import com.google.caliper.Param;
 import com.google.caliper.api.VmOptions;
 import com.google.common.base.Optional;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.util.Random;

 /**
  * Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}.
  */
 // These benchmarks allocate a lot of data so use a large heap
 @VmOptions({"-Xms12g", "-Xmx12g", "-d64"})
 public class ByteSourceAsCharSourceReadBenchmark {
   enum ReadStrategy {
     TO_BYTE_ARRAY_NEW_STRING {
       @Override
       String read(ByteSource byteSource, Charset cs) throws IOException {
         return new String(byteSource.read(), cs);
       }
     },
     USING_CHARSTREAMS_COPY {
       @Override
       String read(ByteSource byteSource, Charset cs) throws IOException {
         StringBuilder sb = new StringBuilder();
         try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
           CharStreams.copy(reader, sb);
         }
         return sb.toString();
       }
     },
     // It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING.  But it just isn't
     // my best guess is that the jdk authors have spent more time optimizing that callpath than this
     // one. (StringCoding$StringDecoder vs. StreamDecoder).  StringCoding has a ton of special cases
     // theoretically we could duplicate all that logic here to try to beat 'new String' or at least
     // come close.
     USING_DECODER_WITH_SIZE_HINT {
       @Override
       String read(ByteSource byteSource, Charset cs) throws IOException {
         Optional<Long> size = byteSource.sizeIfKnown();
         // if we know the size and it fits in an int
         if (size.isPresent() && size.get().longValue() == size.get().intValue()) {
           // otherwise try to presize a StringBuilder
           // it is kind of lame that we need to construct a decoder to access this value.
           // if this is a concern we could add special cases for some known charsets (like utf8)
           // or we could avoid inputstreamreader and use the decoder api directly
           // TODO(lukes): in a real implementation we would need to handle overflow conditions
           int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte());
           char[] buffer = new char[maxChars];
           int bufIndex = 0;
           int remaining = buffer.length;
           try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
             int nRead = 0;
             while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) {
               bufIndex += nRead;
               remaining -= nRead;
             }
             if (nRead == -1) {
               // we reached EOF
               return new String(buffer, 0, bufIndex);
             }
             // otherwise we got the size wrong.  This can happen if the size changes between when
             // we called sizeIfKnown and when we started reading the file (or i guess if
             // maxCharsPerByte is wrong)
             // Fallback to an incremental approach
             StringBuilder builder = new StringBuilder(bufIndex + 32);
             builder.append(buffer, 0, bufIndex);
             buffer = null; // release for gc
             CharStreams.copy(reader, builder);
             return builder.toString();
           }

         } else {
           return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs);
         }
       }
     };

     abstract String read(ByteSource byteSource, Charset cs) throws IOException;
   }

   @Param({"UTF-8"})
   String charsetName;

   @Param ReadStrategy strategy;

   @Param({"10", "1024", "1048576"})
   int size;

   Charset charset;
   ByteSource data;

   @BeforeExperiment
   public void setUp() {
     charset = Charset.forName(charsetName);
     StringBuilder sb = new StringBuilder();
     Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior
     sb.ensureCapacity(size);
     for (int k = 0; k < size; k++) {
       // [9-127) includes all ascii non-control characters
       sb.append((char) (random.nextInt(127 - 9) + 9));
     }
     String string = sb.toString();
     sb.setLength(0);
     data = ByteSource.wrap(string.getBytes(charset));
   }

   @Benchmark
   public int timeCopy(int reps) throws IOException {
     int r = 0;
     final Charset localCharset = charset;
     final ByteSource localData = data;
     final ReadStrategy localStrategy = strategy;
     for (int i = 0; i < reps; i++) {
       r += localStrategy.read(localData, localCharset).hashCode();
     }
     return r;
   }
 }
	/*
	* Copyright (C) 2017 The Guava Authors
	*
	* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
	* in compliance with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software distributed under the License
	* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
	* or implied. See the License for the specific language governing permissions and limitations under
	* the License.
	*/

	package com.google.common.io;

	import com.google.caliper.BeforeExperiment;
	import com.google.caliper.Benchmark;
	import com.google.caliper.Param;
	import com.google.caliper.api.VmOptions;
	import com.google.common.base.Optional;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.nio.charset.Charset;
	import java.util.Random;

	/**
	* Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}.
	*/
	// These benchmarks allocate a lot of data so use a large heap
	@VmOptions({"-Xms12g", "-Xmx12g", "-d64"})
	public class ByteSourceAsCharSourceReadBenchmark {
	enum ReadStrategy {
	TO_BYTE_ARRAY_NEW_STRING {
	@Override
	String read(ByteSource byteSource, Charset cs) throws IOException {
	return new String(byteSource.read(), cs);
	}
	},
	USING_CHARSTREAMS_COPY {
	@Override
	String read(ByteSource byteSource, Charset cs) throws IOException {
	StringBuilder sb = new StringBuilder();
	try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
	CharStreams.copy(reader, sb);
	}
	return sb.toString();
	}
	},
	// It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING. But it just isn't
	// my best guess is that the jdk authors have spent more time optimizing that callpath than this
	// one. (StringCoding$StringDecoder vs. StreamDecoder). StringCoding has a ton of special cases
	// theoretically we could duplicate all that logic here to try to beat 'new String' or at least
	// come close.
	USING_DECODER_WITH_SIZE_HINT {
	@Override
	String read(ByteSource byteSource, Charset cs) throws IOException {
	Optional<Long> size = byteSource.sizeIfKnown();
	// if we know the size and it fits in an int
	if (size.isPresent() && size.get().longValue() == size.get().intValue()) {
	// otherwise try to presize a StringBuilder
	// it is kind of lame that we need to construct a decoder to access this value.
	// if this is a concern we could add special cases for some known charsets (like utf8)
	// or we could avoid inputstreamreader and use the decoder api directly
	// TODO(lukes): in a real implementation we would need to handle overflow conditions
	int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte());
	char[] buffer = new char[maxChars];
	int bufIndex = 0;
	int remaining = buffer.length;
	try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
	int nRead = 0;
	while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) {
	bufIndex += nRead;
	remaining -= nRead;
	}
	if (nRead == -1) {
	// we reached EOF
	return new String(buffer, 0, bufIndex);
	}
	// otherwise we got the size wrong. This can happen if the size changes between when
	// we called sizeIfKnown and when we started reading the file (or i guess if
	// maxCharsPerByte is wrong)
	// Fallback to an incremental approach
	StringBuilder builder = new StringBuilder(bufIndex + 32);
	builder.append(buffer, 0, bufIndex);
	buffer = null; // release for gc
	CharStreams.copy(reader, builder);
	return builder.toString();
	}

	} else {
	return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs);
	}
	}
	};

	abstract String read(ByteSource byteSource, Charset cs) throws IOException;
	}

	@Param({"UTF-8"})
	String charsetName;

	@Param ReadStrategy strategy;

	@Param({"10", "1024", "1048576"})
	int size;

	Charset charset;
	ByteSource data;

	@BeforeExperiment
	public void setUp() {
	charset = Charset.forName(charsetName);
	StringBuilder sb = new StringBuilder();
	Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior
	sb.ensureCapacity(size);
	for (int k = 0; k < size; k++) {
	// [9-127) includes all ascii non-control characters
	sb.append((char) (random.nextInt(127 - 9) + 9));
	}
	String string = sb.toString();
	sb.setLength(0);
	data = ByteSource.wrap(string.getBytes(charset));
	}

	@Benchmark
	public int timeCopy(int reps) throws IOException {
	int r = 0;
	final Charset localCharset = charset;
	final ByteSource localData = data;
	final ReadStrategy localStrategy = strategy;
	for (int i = 0; i < reps; i++) {
	r += localStrategy.read(localData, localCharset).hashCode();
	}
	return r;
	}
	}