blob: 7ec7220d8f9901fdfe5703630aa53668b71ca43a [file] [log] [blame]
/*
* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.nio.file;
import sun.nio.cs.ISO_8859_1;
import sun.nio.cs.UTF_8;
import sun.nio.cs.US_ASCII;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Set;
import java.util.Spliterator;
import java.util.function.Consumer;
/**
* A file-based lines spliterator, leveraging a shared mapped byte buffer and
* associated file channel, covering lines of a file for character encodings
* where line feed characters can be easily identified from character encoded
* bytes.
*
* <p>
* When the root spliterator is first split a mapped byte buffer will be created
* over the file for it's size that was observed when the stream was created.
* Thus a mapped byte buffer is only required for parallel stream execution.
* Sub-spliterators will share that mapped byte buffer. Splitting will use the
* mapped byte buffer to find the closest line feed characters(s) to the left or
* right of the mid-point of covered range of bytes of the file. If a line feed
* is found then the spliterator is split with returned spliterator containing
* the identified line feed characters(s) at the end of it's covered range of
* bytes.
*
* <p>
* Traversing will create a buffered reader, derived from the file channel, for
* the range of bytes of the file. The lines are then read from that buffered
* reader. Once traversing commences no further splitting can be performed and
* the reference to the mapped byte buffer will be set to null.
*/
final class FileChannelLinesSpliterator implements Spliterator<String> {
static final Set<String> SUPPORTED_CHARSET_NAMES;
static {
SUPPORTED_CHARSET_NAMES = new HashSet<>();
SUPPORTED_CHARSET_NAMES.add(UTF_8.INSTANCE.name());
SUPPORTED_CHARSET_NAMES.add(ISO_8859_1.INSTANCE.name());
SUPPORTED_CHARSET_NAMES.add(US_ASCII.INSTANCE.name());
}
private final FileChannel fc;
private final Charset cs;
private int index;
private final int fence;
// Null before first split, non-null when splitting, null when traversing
private ByteBuffer buffer;
// Non-null when traversing
private BufferedReader reader;
FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) {
this.fc = fc;
this.cs = cs;
this.index = index;
this.fence = fence;
}
private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) {
this.fc = fc;
this.buffer = buffer;
this.cs = cs;
this.index = index;
this.fence = fence;
}
@Override
public boolean tryAdvance(Consumer<? super String> action) {
String line = readLine();
if (line != null) {
action.accept(line);
return true;
} else {
return false;
}
}
@Override
public void forEachRemaining(Consumer<? super String> action) {
String line;
while ((line = readLine()) != null) {
action.accept(line);
}
}
private BufferedReader getBufferedReader() {
/**
* A readable byte channel that reads bytes from an underlying
* file channel over a specified range.
*/
ReadableByteChannel rrbc = new ReadableByteChannel() {
@Override
public int read(ByteBuffer dst) throws IOException {
int bytesToRead = fence - index;
if (bytesToRead == 0)
return -1;
int bytesRead;
if (bytesToRead < dst.remaining()) {
// The number of bytes to read is less than remaining
// bytes in the buffer
// Snapshot the limit, reduce it, read, then restore
int oldLimit = dst.limit();
dst.limit(dst.position() + bytesToRead);
bytesRead = fc.read(dst, index);
dst.limit(oldLimit);
} else {
bytesRead = fc.read(dst, index);
}
if (bytesRead == -1) {
index = fence;
return bytesRead;
}
index += bytesRead;
return bytesRead;
}
@Override
public boolean isOpen() {
return fc.isOpen();
}
@Override
public void close() throws IOException {
fc.close();
}
};
return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1));
}
private String readLine() {
if (reader == null) {
reader = getBufferedReader();
buffer = null;
}
try {
return reader.readLine();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
private ByteBuffer getMappedByteBuffer() {
// TODO can the mapped byte buffer be explicitly unmapped?
// It's possible, via a shared-secret mechanism, when either
// 1) the spliterator starts traversing, although traversal can
// happen concurrently for mulitple spliterators, so care is
// needed in this case; or
// 2) when the stream is closed using some shared holder to pass
// the mapped byte buffer when it is created.
try {
return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public Spliterator<String> trySplit() {
// Cannot split after partial traverse
if (reader != null)
return null;
ByteBuffer b;
if ((b = buffer) == null) {
b = buffer = getMappedByteBuffer();
}
final int hi = fence, lo = index;
// Check if line separator hits the mid point
int mid = (lo + hi) >>> 1;
int c = b.get(mid);
if (c == '\n') {
mid++;
} else if (c == '\r') {
// Check if a line separator of "\r\n"
if (++mid < hi && b.get(mid) == '\n') {
mid++;
}
} else {
// TODO give up after a certain distance from the mid point?
// Scan to the left and right of the mid point
int midL = mid - 1;
int midR = mid + 1;
mid = 0;
while (midL > lo && midR < hi) {
// Sample to the left
c = b.get(midL--);
if (c == '\n' || c == '\r') {
// If c is "\r" then no need to check for "\r\n"
// since the subsequent value was previously checked
mid = midL + 2;
break;
}
// Sample to the right
c = b.get(midR++);
if (c == '\n' || c == '\r') {
mid = midR;
// Check if line-separator is "\r\n"
if (c == '\r' && mid < hi && b.get(mid) == '\n') {
mid++;
}
break;
}
}
}
// The left spliterator will have the line-separator at the end
return (mid > lo && mid < hi)
? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b)
: null;
}
@Override
public long estimateSize() {
// Use the number of bytes as an estimate.
// We could divide by a constant that is the average number of
// characters per-line, but that constant will be factored out.
return fence - index;
}
@Override
public long getExactSizeIfKnown() {
return -1;
}
@Override
public int characteristics() {
return Spliterator.ORDERED | Spliterator.NONNULL;
}
}