| /* |
| * Copyright (C) 2012 The Guava Authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| * in compliance with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software distributed under the License |
| * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
| * or implied. See the License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| |
| package com.google.common.io; |
| |
| import static com.google.common.base.Preconditions.checkNotNull; |
| |
| import com.google.common.annotations.Beta; |
| import com.google.common.annotations.GwtIncompatible; |
| import com.google.common.base.Ascii; |
| import com.google.common.base.Optional; |
| import com.google.common.base.Splitter; |
| import com.google.common.collect.AbstractIterator; |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.Lists; |
| import com.google.errorprone.annotations.CanIgnoreReturnValue; |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.io.Writer; |
| import java.nio.charset.Charset; |
| import java.util.Iterator; |
| import java.util.List; |
| import org.checkerframework.checker.nullness.compatqual.NullableDecl; |
| |
| /** |
| * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code |
| * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, |
| * it is an immutable <i>supplier</i> of {@code Reader} instances. |
| * |
| * <p>{@code CharSource} provides two kinds of methods: |
| * |
| * <ul> |
| * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent |
| * instance each time they are called. The caller is responsible for ensuring that the |
| * returned reader is closed. |
| * <li><b>Convenience methods:</b> These are implementations of common operations that are |
| * typically implemented by opening a reader using one of the methods in the first category, |
| * doing something and finally closing the reader that was opened. |
| * </ul> |
| * |
| * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source |
| * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code |
| * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to |
| * be an empty line at the end if the contents are terminated with a line separator. |
| * |
| * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character |
| * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. |
| * |
| * @since 14.0 |
| * @author Colin Decker |
| */ |
| @GwtIncompatible |
| public abstract class CharSource { |
| |
| /** Constructor for use by subclasses. */ |
| protected CharSource() {} |
| |
| /** |
| * Returns a {@link ByteSource} view of this char source that encodes chars read from this source |
| * as bytes using the given {@link Charset}. |
| * |
| * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, |
| * the default implementation of this method will ensure that the original {@code CharSource} is |
| * returned, rather than round-trip encoding. Subclasses that override this method should behave |
| * the same way. |
| * |
| * @since 20.0 |
| */ |
| @Beta |
| public ByteSource asByteSource(Charset charset) { |
| return new AsByteSource(charset); |
| } |
| |
| /** |
| * Opens a new {@link Reader} for reading from this source. This method returns a new, independent |
| * reader each time it is called. |
| * |
| * <p>The caller is responsible for ensuring that the returned reader is closed. |
| * |
| * @throws IOException if an I/O error occurs while opening the reader |
| */ |
| public abstract Reader openStream() throws IOException; |
| |
| /** |
| * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, |
| * independent reader each time it is called. |
| * |
| * <p>The caller is responsible for ensuring that the returned reader is closed. |
| * |
| * @throws IOException if an I/O error occurs while of opening the reader |
| */ |
| public BufferedReader openBufferedStream() throws IOException { |
| Reader reader = openStream(); |
| return (reader instanceof BufferedReader) |
| ? (BufferedReader) reader |
| : new BufferedReader(reader); |
| } |
| |
| /** |
| * Returns the size of this source in chars, if the size can be easily determined without actually |
| * opening the data stream. |
| * |
| * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code |
| * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> |
| * that this method will return a different number of chars than would be returned by reading all |
| * of the chars. |
| * |
| * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may |
| * return a different number of chars if the contents are changed. |
| * |
| * @since 19.0 |
| */ |
| @Beta |
| public Optional<Long> lengthIfKnown() { |
| return Optional.absent(); |
| } |
| |
| /** |
| * Returns the length of this source in chars, even if doing so requires opening and traversing an |
| * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. |
| * |
| * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If |
| * absent, it will fall back to a heavyweight operation that will open a stream, {@link |
| * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that |
| * were skipped. |
| * |
| * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient |
| * implementation, it is <i>possible</i> that this method will return a different number of chars |
| * than would be returned by reading all of the chars. |
| * |
| * <p>In either case, for mutable sources such as files, a subsequent read may return a different |
| * number of chars if the contents are changed. |
| * |
| * @throws IOException if an I/O error occurs while reading the length of this source |
| * @since 19.0 |
| */ |
| @Beta |
| public long length() throws IOException { |
| Optional<Long> lengthIfKnown = lengthIfKnown(); |
| if (lengthIfKnown.isPresent()) { |
| return lengthIfKnown.get(); |
| } |
| |
| Closer closer = Closer.create(); |
| try { |
| Reader reader = closer.register(openStream()); |
| return countBySkipping(reader); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| private long countBySkipping(Reader reader) throws IOException { |
| long count = 0; |
| long read; |
| while ((read = reader.skip(Long.MAX_VALUE)) != 0) { |
| count += read; |
| } |
| return count; |
| } |
| |
| /** |
| * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). |
| * Does not close {@code appendable} if it is {@code Closeable}. |
| * |
| * @return the number of characters copied |
| * @throws IOException if an I/O error occurs while reading from this source or writing to {@code |
| * appendable} |
| */ |
| @CanIgnoreReturnValue |
| public long copyTo(Appendable appendable) throws IOException { |
| checkNotNull(appendable); |
| |
| Closer closer = Closer.create(); |
| try { |
| Reader reader = closer.register(openStream()); |
| return CharStreams.copy(reader, appendable); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Copies the contents of this source to the given sink. |
| * |
| * @return the number of characters copied |
| * @throws IOException if an I/O error occurs while reading from this source or writing to {@code |
| * sink} |
| */ |
| @CanIgnoreReturnValue |
| public long copyTo(CharSink sink) throws IOException { |
| checkNotNull(sink); |
| |
| Closer closer = Closer.create(); |
| try { |
| Reader reader = closer.register(openStream()); |
| Writer writer = closer.register(sink.openStream()); |
| return CharStreams.copy(reader, writer); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Reads the contents of this source as a string. |
| * |
| * @throws IOException if an I/O error occurs while reading from this source |
| */ |
| public String read() throws IOException { |
| Closer closer = Closer.create(); |
| try { |
| Reader reader = closer.register(openStream()); |
| return CharStreams.toString(reader); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Reads the first line of this source as a string. Returns {@code null} if this source is empty. |
| * |
| * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of |
| * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code |
| * \n}. If the source's content does not end in a line termination sequence, it is treated as if |
| * it does. |
| * |
| * @throws IOException if an I/O error occurs while reading from this source |
| */ |
| @NullableDecl |
| public String readFirstLine() throws IOException { |
| Closer closer = Closer.create(); |
| try { |
| BufferedReader reader = closer.register(openBufferedStream()); |
| return reader.readLine(); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Reads all the lines of this source as a list of strings. The returned list will be empty if |
| * this source is empty. |
| * |
| * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of |
| * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code |
| * \n}. If the source's content does not end in a line termination sequence, it is treated as if |
| * it does. |
| * |
| * @throws IOException if an I/O error occurs while reading from this source |
| */ |
| public ImmutableList<String> readLines() throws IOException { |
| Closer closer = Closer.create(); |
| try { |
| BufferedReader reader = closer.register(openBufferedStream()); |
| List<String> result = Lists.newArrayList(); |
| String line; |
| while ((line = reader.readLine()) != null) { |
| result.add(line); |
| } |
| return ImmutableList.copyOf(result); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Reads lines of text from this source, processing each line as it is read using the given {@link |
| * LineProcessor processor}. Stops when all lines have been processed or the processor returns |
| * {@code false} and returns the result produced by the processor. |
| * |
| * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of |
| * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code |
| * \n}. If the source's content does not end in a line termination sequence, it is treated as if |
| * it does. |
| * |
| * @throws IOException if an I/O error occurs while reading from this source or if {@code |
| * processor} throws an {@code IOException} |
| * @since 16.0 |
| */ |
| @Beta |
| @CanIgnoreReturnValue // some processors won't return a useful result |
| public <T> T readLines(LineProcessor<T> processor) throws IOException { |
| checkNotNull(processor); |
| |
| Closer closer = Closer.create(); |
| try { |
| Reader reader = closer.register(openStream()); |
| return CharStreams.readLines(reader, processor); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Returns whether the source has zero chars. The default implementation first checks {@link |
| * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be |
| * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. |
| * |
| * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that |
| * chars are actually available for reading. This means that a source may return {@code true} from |
| * {@code isEmpty()} despite having readable content. |
| * |
| * @throws IOException if an I/O error occurs |
| * @since 15.0 |
| */ |
| public boolean isEmpty() throws IOException { |
| Optional<Long> lengthIfKnown = lengthIfKnown(); |
| if (lengthIfKnown.isPresent()) { |
| return lengthIfKnown.get() == 0L; |
| } |
| Closer closer = Closer.create(); |
| try { |
| Reader reader = closer.register(openStream()); |
| return reader.read() == -1; |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| |
| /** |
| * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from |
| * the source will contain the concatenated data from the streams of the underlying sources. |
| * |
| * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will |
| * close the open underlying stream. |
| * |
| * @param sources the sources to concatenate |
| * @return a {@code CharSource} containing the concatenated data |
| * @since 15.0 |
| */ |
| public static CharSource concat(Iterable<? extends CharSource> sources) { |
| return new ConcatenatedCharSource(sources); |
| } |
| |
| /** |
| * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from |
| * the source will contain the concatenated data from the streams of the underlying sources. |
| * |
| * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will |
| * close the open underlying stream. |
| * |
| * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method |
| * is called. This will fail if the iterator is infinite and may cause problems if the iterator |
| * eagerly fetches data for each source when iterated (rather than producing sources that only |
| * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if |
| * possible. |
| * |
| * @param sources the sources to concatenate |
| * @return a {@code CharSource} containing the concatenated data |
| * @throws NullPointerException if any of {@code sources} is {@code null} |
| * @since 15.0 |
| */ |
| public static CharSource concat(Iterator<? extends CharSource> sources) { |
| return concat(ImmutableList.copyOf(sources)); |
| } |
| |
| /** |
| * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from |
| * the source will contain the concatenated data from the streams of the underlying sources. |
| * |
| * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will |
| * close the open underlying stream. |
| * |
| * @param sources the sources to concatenate |
| * @return a {@code CharSource} containing the concatenated data |
| * @throws NullPointerException if any of {@code sources} is {@code null} |
| * @since 15.0 |
| */ |
| public static CharSource concat(CharSource... sources) { |
| return concat(ImmutableList.copyOf(sources)); |
| } |
| |
| /** |
| * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the |
| * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if |
| * the {@code charSequence} is mutated while it is being read, so don't do that. |
| * |
| * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) |
| */ |
| public static CharSource wrap(CharSequence charSequence) { |
| return charSequence instanceof String |
| ? new StringCharSource((String) charSequence) |
| : new CharSequenceCharSource(charSequence); |
| } |
| |
| /** |
| * Returns an immutable {@link CharSource} that contains no characters. |
| * |
| * @since 15.0 |
| */ |
| public static CharSource empty() { |
| return EmptyCharSource.INSTANCE; |
| } |
| |
| /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ |
| private final class AsByteSource extends ByteSource { |
| |
| final Charset charset; |
| |
| AsByteSource(Charset charset) { |
| this.charset = checkNotNull(charset); |
| } |
| |
| @Override |
| public CharSource asCharSource(Charset charset) { |
| if (charset.equals(this.charset)) { |
| return CharSource.this; |
| } |
| return super.asCharSource(charset); |
| } |
| |
| @Override |
| public InputStream openStream() throws IOException { |
| return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); |
| } |
| |
| @Override |
| public String toString() { |
| return CharSource.this.toString() + ".asByteSource(" + charset + ")"; |
| } |
| } |
| |
| private static class CharSequenceCharSource extends CharSource { |
| |
| private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); |
| |
| protected final CharSequence seq; |
| |
| protected CharSequenceCharSource(CharSequence seq) { |
| this.seq = checkNotNull(seq); |
| } |
| |
| @Override |
| public Reader openStream() { |
| return new CharSequenceReader(seq); |
| } |
| |
| @Override |
| public String read() { |
| return seq.toString(); |
| } |
| |
| @Override |
| public boolean isEmpty() { |
| return seq.length() == 0; |
| } |
| |
| @Override |
| public long length() { |
| return seq.length(); |
| } |
| |
| @Override |
| public Optional<Long> lengthIfKnown() { |
| return Optional.of((long) seq.length()); |
| } |
| |
| /** |
| * Returns an iterator over the lines in the string. If the string ends in a newline, a final |
| * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). |
| */ |
| private Iterator<String> linesIterator() { |
| return new AbstractIterator<String>() { |
| Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); |
| |
| @Override |
| protected String computeNext() { |
| if (lines.hasNext()) { |
| String next = lines.next(); |
| // skip last line if it's empty |
| if (lines.hasNext() || !next.isEmpty()) { |
| return next; |
| } |
| } |
| return endOfData(); |
| } |
| }; |
| } |
| |
| @Override |
| public String readFirstLine() { |
| Iterator<String> lines = linesIterator(); |
| return lines.hasNext() ? lines.next() : null; |
| } |
| |
| @Override |
| public ImmutableList<String> readLines() { |
| return ImmutableList.copyOf(linesIterator()); |
| } |
| |
| @Override |
| public <T> T readLines(LineProcessor<T> processor) throws IOException { |
| Iterator<String> lines = linesIterator(); |
| while (lines.hasNext()) { |
| if (!processor.processLine(lines.next())) { |
| break; |
| } |
| } |
| return processor.getResult(); |
| } |
| |
| @Override |
| public String toString() { |
| return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; |
| } |
| } |
| |
| /** |
| * Subclass specialized for string instances. |
| * |
| * <p>Since Strings are immutable and built into the jdk we can optimize some operations |
| * |
| * <ul> |
| * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can |
| * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by |
| * one with {@link CharSequence#charAt(int)}. |
| * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link |
| * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length |
| * can't change, and it is faster because many writers and appendables are optimized for |
| * appending string instances. |
| * </ul> |
| */ |
| private static class StringCharSource extends CharSequenceCharSource { |
| protected StringCharSource(String seq) { |
| super(seq); |
| } |
| |
| @Override |
| public Reader openStream() { |
| return new StringReader((String) seq); |
| } |
| |
| @Override |
| public long copyTo(Appendable appendable) throws IOException { |
| appendable.append(seq); |
| return seq.length(); |
| } |
| |
| @Override |
| public long copyTo(CharSink sink) throws IOException { |
| checkNotNull(sink); |
| Closer closer = Closer.create(); |
| try { |
| Writer writer = closer.register(sink.openStream()); |
| writer.write((String) seq); |
| return seq.length(); |
| } catch (Throwable e) { |
| throw closer.rethrow(e); |
| } finally { |
| closer.close(); |
| } |
| } |
| } |
| |
| private static final class EmptyCharSource extends StringCharSource { |
| |
| private static final EmptyCharSource INSTANCE = new EmptyCharSource(); |
| |
| private EmptyCharSource() { |
| super(""); |
| } |
| |
| @Override |
| public String toString() { |
| return "CharSource.empty()"; |
| } |
| } |
| |
| private static final class ConcatenatedCharSource extends CharSource { |
| |
| private final Iterable<? extends CharSource> sources; |
| |
| ConcatenatedCharSource(Iterable<? extends CharSource> sources) { |
| this.sources = checkNotNull(sources); |
| } |
| |
| @Override |
| public Reader openStream() throws IOException { |
| return new MultiReader(sources.iterator()); |
| } |
| |
| @Override |
| public boolean isEmpty() throws IOException { |
| for (CharSource source : sources) { |
| if (!source.isEmpty()) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| @Override |
| public Optional<Long> lengthIfKnown() { |
| long result = 0L; |
| for (CharSource source : sources) { |
| Optional<Long> lengthIfKnown = source.lengthIfKnown(); |
| if (!lengthIfKnown.isPresent()) { |
| return Optional.absent(); |
| } |
| result += lengthIfKnown.get(); |
| } |
| return Optional.of(result); |
| } |
| |
| @Override |
| public long length() throws IOException { |
| long result = 0L; |
| for (CharSource source : sources) { |
| result += source.length(); |
| } |
| return result; |
| } |
| |
| @Override |
| public String toString() { |
| return "CharSource.concat(" + sources + ")"; |
| } |
| } |
| } |