blob: 78cd261306fc02589934c833b2cbdea6f46d6d35 [file] [log] [blame]
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.powermodel;
import java.io.InputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
/**
* Parses CSV.
* <p>
* Call parse() with an InputStream.
* <p>
* CsvLineProcessor.onLine() will be called for each line in the source document.
* <p>
* To simplify parsing and to protect against using too much memory for bad
* data, the maximum field length is {@link #MAX_FIELD_SIZE}.
*/
class CsvParser {
/**
* The maximum size of a single field in bytes.
*/
public static final int MAX_FIELD_SIZE = (8*1024)-1;
/**
* Callback interface for each line of CSV as it is parsed.
*/
interface LineProcessor {
/**
* A line of CSV was parsed.
*
* @param lineNumber the line number in the file, starting at 1
* @param fields the comma separated fields for the line
*/
void onLine(int lineNumber, ArrayList<String> fields) throws ParseException;
}
/**
* Parse the CSV text in input, calling onto processor for each row.
*/
public static void parse(InputStream input, LineProcessor processor)
throws IOException, ParseException {
final Charset utf8 = StandardCharsets.UTF_8;
final byte[] buf = new byte[MAX_FIELD_SIZE+1];
int lineNumber = 1;
int readPos = 0;
int prev = 0;
ArrayList<String> fields = new ArrayList<String>();
boolean finalBuffer = false;
boolean escaping = false;
boolean sawQuote = false;
while (!finalBuffer) {
int amt = input.read(buf, readPos, buf.length-readPos);
if (amt < 0) {
// No more data. Process whatever's left from before.
amt = readPos;
finalBuffer = true;
} else {
// Process whatever's left from before, plus the new data.
amt += readPos;
finalBuffer = false;
}
// Process as much of this buffer as we can.
int fieldStart = 0;
int index = readPos;
int escapeIndex = escaping ? readPos : -1;
while (index < amt) {
byte c = buf[index];
if (c == '\r' || c == '\n') {
if (escaping) {
// TODO: Quotes do not escape newlines in our CSV dialect,
// but we actually see some data where it should.
fields.add(new String(buf, fieldStart, escapeIndex-fieldStart));
escapeIndex = -1;
escaping = false;
sawQuote = false;
} else {
fields.add(new String(buf, fieldStart, index-fieldStart));
}
// Don't report blank lines
if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) {
processor.onLine(lineNumber, fields);
}
fields = new ArrayList<String>();
if (!(c == '\n' && prev == '\r')) {
// Don't double increment for dos line endings.
lineNumber++;
}
fieldStart = index = index + 1;
} else {
if (escaping) {
// Field started with a " so quotes are escaped with " and commas
// don't matter except when following a single quote.
if (c == '"') {
if (sawQuote) {
buf[escapeIndex] = buf[index];
escapeIndex++;
sawQuote = false;
} else {
sawQuote = true;
}
index++;
} else if (sawQuote && c == ',') {
fields.add(new String(buf, fieldStart, escapeIndex-fieldStart));
fieldStart = index = index + 1;
escapeIndex = -1;
escaping = false;
sawQuote = false;
} else {
buf[escapeIndex] = buf[index];
escapeIndex++;
index++;
sawQuote = false;
}
} else {
if (c == ',') {
fields.add(new String(buf, fieldStart, index-fieldStart));
fieldStart = index + 1;
} else if (c == '"' && fieldStart == index) {
// First character is a "
escaping = true;
fieldStart = escapeIndex = index + 1;
}
index++;
}
}
prev = c;
}
// A single field is greater than buf.length, so fail.
if (fieldStart == 0 && index == buf.length) {
throw new ParseException(lineNumber, "Line is too long: "
+ new String(buf, 0, 20, utf8) + "...");
}
// Move whatever we didn't process to the beginning of the buffer
// and try again.
if (fieldStart != amt) {
readPos = (escaping ? escapeIndex : index) - fieldStart;
System.arraycopy(buf, fieldStart, buf, 0, readPos);
} else {
readPos = 0;
}
// Process whatever's left over
if (finalBuffer) {
fields.add(new String(buf, 0, readPos));
// If there is any content, return the last line.
if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) {
processor.onLine(lineNumber, fields);
}
}
}
}
}