Add csv parser for the power profile library.

There are enough variants of csv, and it's simple enough that
it's easier to just have our own parser than to introduce a
dependency on some other library, as this library will be used
in a variety of environments.

Test: atest frameworks/base/tools/powermodel --host
Change-Id: Ib0f7aceb2a58b58f447f6bcef5c95729303dee8a
This commit is contained in:
Joe Onorato
2018-11-16 16:08:15 -08:00
parent 9112a5e07a
commit 7d1851fe49
2 changed files with 484 additions and 0 deletions

View File

@@ -0,0 +1,173 @@
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.powermodel;
import java.io.InputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
/**
* Parses CSV.
* <p>
* Call parse() with an InputStream.
* <p>
* CsvLineProcessor.onLine() will be called for each line in the source document.
* <p>
* To simplify parsing and to protect against using too much memory for bad
* data, the maximum field length is {@link #MAX_FIELD_SIZE}.
*/
class CsvParser {
/**
* The maximum size of a single field in bytes.
*/
public static final int MAX_FIELD_SIZE = (8*1024)-1;
/**
* Callback interface for each line of CSV as it is parsed.
*/
interface LineProcessor {
/**
* A line of CSV was parsed.
*
* @param lineNumber the line number in the file, starting at 1
* @param fields the comma separated fields for the line
*/
void onLine(int lineNumber, ArrayList<String> fields) throws ParseException;
}
/**
* Parse the CSV text in input, calling onto processor for each row.
*/
public static void parse(InputStream input, LineProcessor processor)
throws IOException, ParseException {
final Charset utf8 = StandardCharsets.UTF_8;
final byte[] buf = new byte[MAX_FIELD_SIZE+1];
int lineNumber = 1;
int readPos = 0;
int prev = 0;
ArrayList<String> fields = new ArrayList<String>();
boolean finalBuffer = false;
boolean escaping = false;
boolean sawQuote = false;
while (!finalBuffer) {
int amt = input.read(buf, readPos, buf.length-readPos);
if (amt < 0) {
// No more data. Process whatever's left from before.
amt = readPos;
finalBuffer = true;
} else {
// Process whatever's left from before, plus the new data.
amt += readPos;
finalBuffer = false;
}
// Process as much of this buffer as we can.
int fieldStart = 0;
int index = readPos;
int escapeIndex = escaping ? readPos : -1;
while (index < amt) {
byte c = buf[index];
if (c == '\r' || c == '\n') {
if (escaping) {
// TODO: Quotes do not escape newlines in our CSV dialect,
// but we actually see some data where it should.
fields.add(new String(buf, fieldStart, escapeIndex-fieldStart));
escapeIndex = -1;
escaping = false;
sawQuote = false;
} else {
fields.add(new String(buf, fieldStart, index-fieldStart));
}
// Don't report blank lines
if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) {
processor.onLine(lineNumber, fields);
}
fields = new ArrayList<String>();
if (!(c == '\n' && prev == '\r')) {
// Don't double increment for dos line endings.
lineNumber++;
}
fieldStart = index = index + 1;
} else {
if (escaping) {
// Field started with a " so quotes are escaped with " and commas
// don't matter except when following a single quote.
if (c == '"') {
if (sawQuote) {
buf[escapeIndex] = buf[index];
escapeIndex++;
sawQuote = false;
} else {
sawQuote = true;
}
index++;
} else if (sawQuote && c == ',') {
fields.add(new String(buf, fieldStart, escapeIndex-fieldStart));
fieldStart = index = index + 1;
escapeIndex = -1;
escaping = false;
sawQuote = false;
} else {
buf[escapeIndex] = buf[index];
escapeIndex++;
index++;
sawQuote = false;
}
} else {
if (c == ',') {
fields.add(new String(buf, fieldStart, index-fieldStart));
fieldStart = index + 1;
} else if (c == '"' && fieldStart == index) {
// First character is a "
escaping = true;
fieldStart = escapeIndex = index + 1;
}
index++;
}
}
prev = c;
}
// A single field is greater than buf.length, so fail.
if (fieldStart == 0 && index == buf.length) {
throw new ParseException(lineNumber, "Line is too long: "
+ new String(buf, 0, 20, utf8) + "...");
}
// Move whatever we didn't process to the beginning of the buffer
// and try again.
if (fieldStart != amt) {
readPos = (escaping ? escapeIndex : index) - fieldStart;
System.arraycopy(buf, fieldStart, buf, 0, readPos);
} else {
readPos = 0;
}
// Process whatever's left over
if (finalBuffer) {
fields.add(new String(buf, 0, readPos));
// If there is any content, return the last line.
if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) {
processor.onLine(lineNumber, fields);
}
}
}
}
}

View File

@@ -0,0 +1,311 @@
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.powermodel;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests {@link PowerProfile}
*/
public class CsvParserTest {
class LineCollector implements CsvParser.LineProcessor {
ArrayList<ArrayList<String>> results = new ArrayList<ArrayList<String>>();
@Override
public void onLine(int lineNumber, ArrayList<String> fields) {
System.out.println(lineNumber);
for (String str: fields) {
System.out.println("-->" + str + "<--");
}
results.add(fields);
}
}
private void assertEquals(String[][] expected, ArrayList<ArrayList<String>> results) {
final String[][] resultArray = new String[results.size()][];
for (int i=0; i<results.size(); i++) {
final ArrayList<String> list = results.get(i);
resultArray[i] = list.toArray(new String[list.size()]);
}
Assert.assertArrayEquals(expected, resultArray);
}
private String makeString(int length) {
final StringBuilder str = new StringBuilder();
for (int i=0; i<length; i++) {
str.append('a');
}
return str.toString();
}
@Test public void testEmpty() throws Exception {
final String text = "";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
}, collector.results);
}
@Test public void testOnlyNewline() throws Exception {
final String text = "\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
}, collector.results);
}
@Test public void testTwoLines() throws Exception {
final String text = "one,twoo,3\nfour,5,six\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "one", "twoo", "3", },
{ "four", "5", "six", },
}, collector.results);
}
@Test public void testEscapedEmpty() throws Exception {
final String text = "\"\",\"\",\"\"\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "", "", "", },
}, collector.results);
}
@Test public void testEscapedText() throws Exception {
final String text = "\"one\",\"twoo\",\"3\"\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "one", "twoo", "3", },
}, collector.results);
}
@Test public void testEscapedQuotes() throws Exception {
final String text = "\"\"\"\",\"\"\"\"\"\",\"\"\"\"\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "\"", "\"\"", "\"", },
}, collector.results);
}
@Test public void testEscapedCommas() throws Exception {
final String text = "\",\",\",\",\",\"\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ ",", ",", ",", },
}, collector.results);
}
@Test public void testEscapedQuotesAndCommas() throws Exception {
final String text = "\"\"\",\",\"\"\",\",\"\"\",\"\n";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "\",", "\",", "\",", },
}, collector.results);
}
@Test public void testNoNewline() throws Exception {
final String text = "a,b,c";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "a", "b", "c", }
}, collector.results);
}
@Test public void testNoNewlineWithCommas() throws Exception {
final String text = "a,b,,";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "a", "b", "", "" }
}, collector.results);
}
@Test public void testNoNewlineWithQuote() throws Exception {
final String text = "a,b,\",\"";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "a", "b", "," }
}, collector.results);
}
@Test public void testNoCommas() throws Exception {
final String text = "aasdfadfadfad";
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ "aasdfadfadfad", }
}, collector.results);
}
@Test public void testMaxLength() throws Exception {
final String text = makeString(CsvParser.MAX_FIELD_SIZE);
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ text, }
}, collector.results);
}
@Test public void testMaxLengthTwice() throws Exception {
String big = makeString(CsvParser.MAX_FIELD_SIZE);
final String text = big + "," + big;
System.out.println("Test: [" + text + "]");
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ big, big, }
}, collector.results);
}
@Test public void testTooLong() throws Exception {
final String text = makeString(CsvParser.MAX_FIELD_SIZE+1);
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
try {
CsvParser.parse(is, collector);
throw new RuntimeException("Expected CsvParser.parse to throw ParseException");
} catch (ParseException ex) {
// good
}
}
@Test public void testBufferBoundary() throws Exception {
final String big = makeString(CsvParser.MAX_FIELD_SIZE-3);
final String text = big + ",b,c,d,e,f,g";
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ big, "b", "c", "d", "e", "f", "g", }
}, collector.results);
}
@Test public void testBufferBoundaryEmpty() throws Exception {
final String big = makeString(CsvParser.MAX_FIELD_SIZE-3);
final String text = big + ",,,,,,";
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ big, "", "", "", "", "", "", }
}, collector.results);
}
// Checks that the escaping and sawQuote behavior is correct at the buffer boundary
@Test public void testBufferBoundaryEscapingEven() throws Exception {
final String big = makeString(CsvParser.MAX_FIELD_SIZE-2);
final String text = big + ",\"\"\"\"\"\"\"\"\"\"\"\"," + big;
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ big, "\"\"\"\"\"", big }
}, collector.results);
}
// Checks that the escaping and sawQuote behavior is correct at the buffer boundary
@Test public void testBufferBoundaryEscapingOdd() throws Exception {
final String big = makeString(CsvParser.MAX_FIELD_SIZE-3);
final String text = big + ",\"\"\"\"\"\"\"\"\"\"\"\"," + big;
final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
LineCollector collector = new LineCollector();
CsvParser.parse(is, collector);
assertEquals(new String[][] {
{ big, "\"\"\"\"\"", big }
}, collector.results);
}
}