| /* |
| * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * - Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * - Neither the name of Oracle nor the names of its |
| * contributors may be used to endorse or promote products derived |
| * from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS |
| * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* |
| * This source code is provided to illustrate the usage of a given feature |
| * or technique and has been deliberately simplified. Additional steps |
| * required for a production-quality application, such as security checks, |
| * input validation, and proper error handling, might not be present in |
| * this sample code. |
| */ |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.nio.file.Files; |
| import java.nio.file.Paths; |
| import java.util.*; |
| import java.util.function.*; |
| import java.util.regex.Pattern; |
| import java.util.stream.Collector; |
| import java.util.stream.Collectors; |
| |
| import static java.lang.Double.parseDouble; |
| import static java.util.stream.Collectors.*; |
| |
| /** |
| * CSVProcessor is a tool for processing CSV files. There are several |
| * command-line options. Consult the {@link #printUsageAndExit} method for |
| * instructions and command line parameters. This sample shows examples of the |
| * following features: |
| * <ul> |
| * <li>Lambda and bulk operations. Working with streams: map(...), filter(...), |
| * sorted(...) methods. The collect(...) method with different collectors: |
| * Collectors.maxBy(...), Collectors.minBy(...), Collectors.toList(), |
| * Collectors.toCollection(...), Collectors.groupingBy(...), |
| * Collectors.toDoubleSummaryStatistics(...), and a custom Collector.</li> |
| * <li>Static method reference for printing values.</li> |
| * <li>Try-with-resources feature for closing files.</li> |
| * <li>Switch by String feature.</li> |
| * <li>Other new APIs: Pattern.asPredicate(), BinaryOperator |
| * BufferedReader.lines(), Collection.forEach(...), Comparator.comparing(...), |
| * Comparator.reversed(), Arrays.stream(...).</li> |
| * </ul> |
| * |
| */ |
| public class CSVProcessor { |
| |
| //Number of characters that may be read |
| private static final int READ_AHEAD_LIMIT = 100_000_000; |
| |
| /** |
| * The main method for the CSVProcessor program. Run the program with an |
| * empty argument list to see possible arguments. |
| * |
| * @param args the argument list for CSVProcessor. |
| */ |
| public static void main(String[] args) { |
| if (args.length < 2) { |
| printUsageAndExit(); |
| } |
| try (BufferedReader br = new BufferedReader( |
| Files.newBufferedReader(Paths.get(args[args.length - 1])))) { |
| //Assume that the first line contains column names. |
| List<String> header = Arrays.stream(br.readLine().split(",")) |
| .map(String::trim).collect(toList()); |
| //Calculate an index of the column in question. |
| int column = getColumnNumber(header, args[1]); |
| switch (args[0]) { |
| case "sort": |
| verifyArgumentNumber(args, 4); |
| //Define the sort order. |
| boolean isAsc; |
| switch (args[2].toUpperCase()) { |
| case "ASC": |
| isAsc = true; |
| break; |
| case "DESC": |
| isAsc = false; |
| break; |
| default: |
| printUsageAndExit("Illegal argument" + args[2]); |
| return;//Should not be reached. |
| } |
| /* |
| * Create a comparator that compares lines by comparing |
| * values in the specified column. |
| */ |
| Comparator<String> cmp |
| = Comparator.comparing(str -> getCell(str, column), |
| String.CASE_INSENSITIVE_ORDER); |
| /* |
| * sorted(...) is used to sort records. |
| * forEach(...) is used to output sorted records. |
| */ |
| br.lines().sorted(isAsc ? cmp : cmp.reversed()) |
| .forEach(System.out::println); |
| break; |
| case "search": |
| verifyArgumentNumber(args, 4); |
| /* |
| * Records are filtered by a regex. |
| * forEach(...) is used to output filtered records. |
| */ |
| Predicate<String> pattern |
| = Pattern.compile(args[2]).asPredicate(); |
| br.lines().filter(str -> pattern.test(getCell(str, column))) |
| .forEach(System.out::println); |
| break; |
| case "groupby": |
| verifyArgumentNumber(args, 3); |
| /* |
| * Group lines by values in the column with collect(...), and |
| * print with forEach(...) for every distinct value within |
| * the column. |
| */ |
| br.lines().collect( |
| Collectors.groupingBy(str -> getCell(str, column), |
| toCollection(TreeSet::new))) |
| .forEach((str, set) -> { |
| System.out.println(str + ":"); |
| set.forEach(System.out::println); |
| }); |
| break; |
| case "stat": |
| verifyArgumentNumber(args, 3); |
| |
| /* |
| * BufferedReader will be read several times. |
| * Mark this point to return here after each pass. |
| * BufferedReader will be read right after the headers line |
| * because it is already read. |
| */ |
| br.mark(READ_AHEAD_LIMIT); |
| |
| /* |
| * Statistics can be collected by a custom collector in one |
| * pass. One pass is preferable. |
| */ |
| System.out.println( |
| br.lines().collect(new Statistics(column))); |
| |
| /* |
| * Alternatively, statistics can be collected |
| * by a built-in API in several passes. |
| * This method demonstrates how separate operations can be |
| * implemented using a built-in API. |
| */ |
| br.reset(); |
| statInSeveralPasses(br, column); |
| break; |
| default: |
| printUsageAndExit("Illegal argument" + args[0]); |
| } |
| } catch (IOException e) { |
| printUsageAndExit(e.toString()); |
| } |
| } |
| |
| private static void statInSeveralPasses(BufferedReader br, int column) |
| throws IOException { |
| System.out.println("#-----Statistics in several passes-------#"); |
| //Create a comparator to compare records by the column. |
| Comparator<String> comparator |
| = Comparator.comparing( |
| (String str) -> parseDouble(getCell(str, column))); |
| //Find max record by using Collectors.maxBy(...) |
| System.out.println( |
| "Max: " + br.lines().collect(maxBy(comparator)).get()); |
| br.reset(); |
| //Find min record by using Collectors.minBy(...) |
| System.out.println( |
| "Min: " + br.lines().collect(minBy(comparator)).get()); |
| br.reset(); |
| //Compute the average value and sum with |
| //Collectors.toDoubleSummaryStatistics(...) |
| DoubleSummaryStatistics doubleSummaryStatistics |
| = br.lines().collect(summarizingDouble( |
| str -> parseDouble(getCell(str, column)))); |
| System.out.println("Average: " + doubleSummaryStatistics.getAverage()); |
| System.out.println("Sum: " + doubleSummaryStatistics.getSum()); |
| } |
| |
| private static void verifyArgumentNumber(String[] args, int n) { |
| if (args.length != n) { |
| printUsageAndExit("Expected " + n + " arguments but was " |
| + args.length); |
| } |
| } |
| |
| private static int getColumnNumber(List<String> header, String name) { |
| int column = header.indexOf(name); |
| if (column == -1) { |
| printUsageAndExit("There is no column with name " + name); |
| } |
| return column; |
| } |
| |
| private static String getCell(String record, int column) { |
| return record.split(",")[column].trim(); |
| } |
| |
| private static void printUsageAndExit(String... str) { |
| System.out.println("Usages:"); |
| |
| System.out.println("CSVProcessor sort COLUMN_NAME ASC|DESC FILE"); |
| System.out.println("Sort lines by column COLUMN_NAME in CSV FILE\n"); |
| |
| System.out.println("CSVProcessor search COLUMN_NAME REGEX FILE"); |
| System.out.println("Search for REGEX in column COLUMN_NAME in CSV FILE\n"); |
| |
| System.out.println("CSVProcessor groupby COLUMN_NAME FILE"); |
| System.out.println("Split lines into different groups according to column " |
| + "COLUMN_NAME value\n"); |
| |
| System.out.println("CSVProcessor stat COLUMN_NAME FILE"); |
| System.out.println("Compute max/min/average/sum statistics by column " |
| + "COLUMN_NAME\n"); |
| |
| Arrays.asList(str).forEach(System.err::println); |
| System.exit(1); |
| } |
| |
| /* |
| * This is a custom implementation of the Collector interface. |
| * Statistics are objects gather max,min,sum,average statistics. |
| */ |
| private static class Statistics |
| implements Collector<String, Statistics, Statistics> { |
| |
| |
| /* |
| * This implementation does not need to be thread safe because |
| * the parallel implementation of |
| * {@link java.util.stream.Stream#collect Stream.collect()} |
| * provides the necessary partitioning and isolation for safe parallel |
| * execution. |
| */ |
| private String maxRecord; |
| private String minRecord; |
| |
| private double sum; |
| private int lineCount; |
| private final BinaryOperator<String> maxOperator; |
| private final BinaryOperator<String> minOperator; |
| private final int column; |
| |
| public Statistics(int column) { |
| this.column = column; |
| Comparator<String> cmp = Comparator.comparing( |
| (String str) -> parseDouble(getCell(str, column))); |
| maxOperator = BinaryOperator.maxBy(cmp); |
| minOperator = BinaryOperator.minBy(cmp); |
| } |
| |
| /* |
| * Process line. |
| */ |
| public Statistics accept(String line) { |
| maxRecord = maxRecord == null |
| ? line : maxOperator.apply(maxRecord, line); |
| minRecord = minRecord == null |
| ? line : minOperator.apply(minRecord, line); |
| |
| sum += parseDouble(getCell(line, column)); |
| lineCount++; |
| return this; |
| } |
| |
| |
| /* |
| * Merge two Statistics. |
| */ |
| public Statistics combine(Statistics stat) { |
| maxRecord = maxOperator.apply(maxRecord, stat.getMaxRecord()); |
| minRecord = minOperator.apply(minRecord, stat.getMinRecord()); |
| sum += stat.getSum(); |
| lineCount += stat.getLineCount(); |
| return this; |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("#------Statistics------#\n"); |
| sb.append("Max: ").append(getMaxRecord()).append("\n"); |
| sb.append("Min: ").append(getMinRecord()).append("\n"); |
| sb.append("Sum = ").append(getSum()).append("\n"); |
| sb.append("Average = ").append(average()).append("\n"); |
| sb.append("#------Statistics------#\n"); |
| return sb.toString(); |
| } |
| |
| @Override |
| public Supplier<Statistics> supplier() { |
| return () -> new Statistics(column); |
| } |
| |
| @Override |
| public BiConsumer<Statistics, String> accumulator() { |
| return Statistics::accept; |
| } |
| |
| @Override |
| public BinaryOperator<Statistics> combiner() { |
| return Statistics::combine; |
| |
| } |
| |
| @Override |
| public Function<Statistics, Statistics> finisher() { |
| return stat -> stat; |
| } |
| |
| @Override |
| public Set<Characteristics> characteristics() { |
| return EnumSet.of(Characteristics.IDENTITY_FINISH); |
| } |
| |
| private String getMaxRecord() { |
| return maxRecord; |
| } |
| |
| private String getMinRecord() { |
| return minRecord; |
| } |
| |
| private double getSum() { |
| return sum; |
| } |
| |
| private double average() { |
| return sum / lineCount; |
| } |
| |
| private int getLineCount() { |
| return lineCount; |
| } |
| |
| } |
| |
| } |