darwin-x86/sample/lambda/BulkDataOperations/src/CSVProcessor.java - platform/prebuilts/jdk/jdk8 - Git at Google

 /*
  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *   - Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *
  *   - Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *
  *   - Neither the name of Oracle nor the names of its
  *     contributors may be used to endorse or promote products derived
  *     from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 /*
  * This source code is provided to illustrate the usage of a given feature
  * or technique and has been deliberately simplified. Additional steps
  * required for a production-quality application, such as security checks,
  * input validation, and proper error handling, might not be present in
  * this sample code.
  */

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.*;
 import java.util.function.*;
 import java.util.regex.Pattern;
 import java.util.stream.Collector;
 import java.util.stream.Collectors;

 import static java.lang.Double.parseDouble;
 import static java.util.stream.Collectors.*;

 /**
  * CSVProcessor is a tool for processing CSV files. There are several
  * command-line options. Consult the {@link #printUsageAndExit} method for
  * instructions and command line parameters. This sample shows examples of the
  * following features:
  * <ul>
  * <li>Lambda and bulk operations. Working with streams: map(...), filter(...),
  * sorted(...) methods. The collect(...) method with different collectors:
  * Collectors.maxBy(...), Collectors.minBy(...), Collectors.toList(),
  * Collectors.toCollection(...), Collectors.groupingBy(...),
  * Collectors.toDoubleSummaryStatistics(...), and a custom Collector.</li>
  * <li>Static method reference for printing values.</li>
  * <li>Try-with-resources feature for closing files.</li>
  * <li>Switch by String feature.</li>
  * <li>Other new APIs: Pattern.asPredicate(), BinaryOperator
  * BufferedReader.lines(), Collection.forEach(...), Comparator.comparing(...),
  * Comparator.reversed(), Arrays.stream(...).</li>
  * </ul>
  *
  */
 public class CSVProcessor {

     //Number of characters that may be read
     private static final int READ_AHEAD_LIMIT = 100_000_000;

     /**
      * The main method for the CSVProcessor program. Run the program with an
      * empty argument list to see possible arguments.
      *
      * @param args the argument list for CSVProcessor.
      */
     public static void main(String[] args) {
         if (args.length < 2) {
             printUsageAndExit();
         }
         try (BufferedReader br = new BufferedReader(
                 Files.newBufferedReader(Paths.get(args[args.length - 1])))) {
             //Assume that the first line contains column names.
             List<String> header = Arrays.stream(br.readLine().split(","))
                     .map(String::trim).collect(toList());
             //Calculate an index of the column in question.
             int column = getColumnNumber(header, args[1]);
             switch (args[0]) {
                 case "sort":
                     verifyArgumentNumber(args, 4);
                     //Define the sort order.
                     boolean isAsc;
                     switch (args[2].toUpperCase()) {
                         case "ASC":
                             isAsc = true;
                             break;
                         case "DESC":
                             isAsc = false;
                             break;
                         default:
                             printUsageAndExit("Illegal argument" + args[2]);
                             return;//Should not be reached.
                     }
                     /*
                      * Create a comparator that compares lines by comparing
                      * values in the specified column.
                      */
                     Comparator<String> cmp
                             = Comparator.comparing(str -> getCell(str, column),
                                     String.CASE_INSENSITIVE_ORDER);
                     /*
                      * sorted(...) is used to sort records.
                      * forEach(...) is used to output sorted records.
                      */
                     br.lines().sorted(isAsc ? cmp : cmp.reversed())
                             .forEach(System.out::println);
                     break;
                 case "search":
                     verifyArgumentNumber(args, 4);
                     /*
                      * Records are filtered by a regex.
                      * forEach(...) is used to output filtered records.
                      */
                     Predicate<String> pattern
                             = Pattern.compile(args[2]).asPredicate();
                     br.lines().filter(str -> pattern.test(getCell(str, column)))
                             .forEach(System.out::println);
                     break;
                 case "groupby":
                     verifyArgumentNumber(args, 3);
                     /*
                      * Group lines by values in the column with collect(...), and
                      * print with forEach(...) for every distinct value within
                      * the column.
                      */
                     br.lines().collect(
                             Collectors.groupingBy(str -> getCell(str, column),
                                     toCollection(TreeSet::new)))
                             .forEach((str, set) -> {
                                 System.out.println(str + ":");
                                 set.forEach(System.out::println);
                             });
                     break;
                 case "stat":
                     verifyArgumentNumber(args, 3);

                     /*
                      * BufferedReader will be read several times.
                      * Mark this point to return here after each pass.
                      * BufferedReader will be read right after the headers line
                      * because it is already read.
                      */
                     br.mark(READ_AHEAD_LIMIT);

                     /*
                      * Statistics can be collected by a custom collector in one
                      * pass. One pass is preferable.
                      */
                     System.out.println(
                             br.lines().collect(new Statistics(column)));

                     /*
                      * Alternatively, statistics can be collected
                      * by a built-in API in several passes.
                      * This method demonstrates how separate operations can be
                      * implemented using a built-in API.
                      */
                     br.reset();
                     statInSeveralPasses(br, column);
                     break;
                 default:
                     printUsageAndExit("Illegal argument" + args[0]);
             }
         } catch (IOException e) {
             printUsageAndExit(e.toString());
         }
     }

     private static void statInSeveralPasses(BufferedReader br, int column)
             throws IOException {
         System.out.println("#-----Statistics in several passes-------#");
         //Create a comparator to compare records by the column.
         Comparator<String> comparator
                 = Comparator.comparing(
                         (String str) -> parseDouble(getCell(str, column)));
         //Find max record by using Collectors.maxBy(...)
         System.out.println(
                 "Max: " + br.lines().collect(maxBy(comparator)).get());
         br.reset();
         //Find min record by using Collectors.minBy(...)
         System.out.println(
                 "Min: " + br.lines().collect(minBy(comparator)).get());
         br.reset();
         //Compute the average value and sum with
         //Collectors.toDoubleSummaryStatistics(...)
         DoubleSummaryStatistics doubleSummaryStatistics
                 = br.lines().collect(summarizingDouble(
                     str -> parseDouble(getCell(str, column))));
         System.out.println("Average: " + doubleSummaryStatistics.getAverage());
         System.out.println("Sum: " + doubleSummaryStatistics.getSum());
     }

     private static void verifyArgumentNumber(String[] args, int n) {
         if (args.length != n) {
             printUsageAndExit("Expected " + n + " arguments but was "
                     + args.length);
         }
     }

     private static int getColumnNumber(List<String> header, String name) {
         int column = header.indexOf(name);
         if (column == -1) {
             printUsageAndExit("There is no column with name " + name);
         }
         return column;
     }

     private static String getCell(String record, int column) {
         return record.split(",")[column].trim();
     }

     private static void printUsageAndExit(String... str) {
         System.out.println("Usages:");

         System.out.println("CSVProcessor sort COLUMN_NAME ASC|DESC FILE");
         System.out.println("Sort lines by column COLUMN_NAME in CSV FILE\n");

         System.out.println("CSVProcessor search COLUMN_NAME REGEX FILE");
         System.out.println("Search for REGEX in column COLUMN_NAME in CSV FILE\n");

         System.out.println("CSVProcessor groupby COLUMN_NAME FILE");
         System.out.println("Split lines into different groups according to column "
                 + "COLUMN_NAME value\n");

         System.out.println("CSVProcessor stat COLUMN_NAME FILE");
         System.out.println("Compute max/min/average/sum  statistics by column "
                 + "COLUMN_NAME\n");

         Arrays.asList(str).forEach(System.err::println);
         System.exit(1);
     }

     /*
      * This is a custom implementation of the Collector interface.
      * Statistics are objects gather max,min,sum,average statistics.
      */
     private static class Statistics
             implements Collector<String, Statistics, Statistics> {


         /*
          * This implementation does not need to be thread safe because
          * the parallel implementation of
          * {@link java.util.stream.Stream#collect Stream.collect()}
          * provides the necessary partitioning and isolation for safe parallel
          * execution.
          */
         private String maxRecord;
         private String minRecord;

         private double sum;
         private int lineCount;
         private final BinaryOperator<String> maxOperator;
         private final BinaryOperator<String> minOperator;
         private final int column;

         public Statistics(int column) {
             this.column = column;
             Comparator<String> cmp = Comparator.comparing(
                     (String str) -> parseDouble(getCell(str, column)));
             maxOperator = BinaryOperator.maxBy(cmp);
             minOperator = BinaryOperator.minBy(cmp);
         }

         /*
          * Process line.
          */
         public Statistics accept(String line) {
             maxRecord = maxRecord == null
                     ? line : maxOperator.apply(maxRecord, line);
             minRecord = minRecord == null
                     ? line : minOperator.apply(minRecord, line);

             sum += parseDouble(getCell(line, column));
             lineCount++;
             return this;
         }


         /*
          * Merge two Statistics.
          */
         public Statistics combine(Statistics stat) {
             maxRecord = maxOperator.apply(maxRecord, stat.getMaxRecord());
             minRecord = minOperator.apply(minRecord, stat.getMinRecord());
             sum += stat.getSum();
             lineCount += stat.getLineCount();
             return this;
         }

         @Override
         public String toString() {
             StringBuilder sb = new StringBuilder();
             sb.append("#------Statistics------#\n");
             sb.append("Max: ").append(getMaxRecord()).append("\n");
             sb.append("Min: ").append(getMinRecord()).append("\n");
             sb.append("Sum = ").append(getSum()).append("\n");
             sb.append("Average = ").append(average()).append("\n");
             sb.append("#------Statistics------#\n");
             return sb.toString();
         }

         @Override
         public Supplier<Statistics> supplier() {
             return () -> new Statistics(column);
         }

         @Override
         public BiConsumer<Statistics, String> accumulator() {
             return Statistics::accept;
         }

         @Override
         public BinaryOperator<Statistics> combiner() {
             return Statistics::combine;

         }

         @Override
         public Function<Statistics, Statistics> finisher() {
             return stat -> stat;
         }

         @Override
         public Set<Characteristics> characteristics() {
             return EnumSet.of(Characteristics.IDENTITY_FINISH);
         }

         private String getMaxRecord() {
             return maxRecord;
         }

         private String getMinRecord() {
             return minRecord;
         }

         private double getSum() {
             return sum;
         }

         private double average() {
             return sum / lineCount;
         }

         private int getLineCount() {
             return lineCount;
         }

     }

 }
	/*
	* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* - Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* - Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* - Neither the name of Oracle nor the names of its
	* contributors may be used to endorse or promote products derived
	* from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
	* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
	* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	/*
	* This source code is provided to illustrate the usage of a given feature
	* or technique and has been deliberately simplified. Additional steps
	* required for a production-quality application, such as security checks,
	* input validation, and proper error handling, might not be present in
	* this sample code.
	*/

	import java.io.BufferedReader;
	import java.io.IOException;
	import java.nio.file.Files;
	import java.nio.file.Paths;
	import java.util.*;
	import java.util.function.*;
	import java.util.regex.Pattern;
	import java.util.stream.Collector;
	import java.util.stream.Collectors;

	import static java.lang.Double.parseDouble;
	import static java.util.stream.Collectors.*;

	/**
	* CSVProcessor is a tool for processing CSV files. There are several
	* command-line options. Consult the {@link #printUsageAndExit} method for
	* instructions and command line parameters. This sample shows examples of the
	* following features:
	* <ul>
	* <li>Lambda and bulk operations. Working with streams: map(...), filter(...),
	* sorted(...) methods. The collect(...) method with different collectors:
	* Collectors.maxBy(...), Collectors.minBy(...), Collectors.toList(),
	* Collectors.toCollection(...), Collectors.groupingBy(...),
	* Collectors.toDoubleSummaryStatistics(...), and a custom Collector.</li>
	* <li>Static method reference for printing values.</li>
	* <li>Try-with-resources feature for closing files.</li>
	* <li>Switch by String feature.</li>
	* <li>Other new APIs: Pattern.asPredicate(), BinaryOperator
	* BufferedReader.lines(), Collection.forEach(...), Comparator.comparing(...),
	* Comparator.reversed(), Arrays.stream(...).</li>
	* </ul>
	*
	*/
	public class CSVProcessor {

	//Number of characters that may be read
	private static final int READ_AHEAD_LIMIT = 100_000_000;

	/**
	* The main method for the CSVProcessor program. Run the program with an
	* empty argument list to see possible arguments.
	*
	* @param args the argument list for CSVProcessor.
	*/
	public static void main(String[] args) {
	if (args.length < 2) {
	printUsageAndExit();
	}
	try (BufferedReader br = new BufferedReader(
	Files.newBufferedReader(Paths.get(args[args.length - 1])))) {
	//Assume that the first line contains column names.
	List<String> header = Arrays.stream(br.readLine().split(","))
	.map(String::trim).collect(toList());
	//Calculate an index of the column in question.
	int column = getColumnNumber(header, args[1]);
	switch (args[0]) {
	case "sort":
	verifyArgumentNumber(args, 4);
	//Define the sort order.
	boolean isAsc;
	switch (args[2].toUpperCase()) {
	case "ASC":
	isAsc = true;
	break;
	case "DESC":
	isAsc = false;
	break;
	default:
	printUsageAndExit("Illegal argument" + args[2]);
	return;//Should not be reached.
	}
	/*
	* Create a comparator that compares lines by comparing
	* values in the specified column.
	*/
	Comparator<String> cmp
	= Comparator.comparing(str -> getCell(str, column),
	String.CASE_INSENSITIVE_ORDER);
	/*
	* sorted(...) is used to sort records.
	* forEach(...) is used to output sorted records.
	*/
	br.lines().sorted(isAsc ? cmp : cmp.reversed())
	.forEach(System.out::println);
	break;
	case "search":
	verifyArgumentNumber(args, 4);
	/*
	* Records are filtered by a regex.
	* forEach(...) is used to output filtered records.
	*/
	Predicate<String> pattern
	= Pattern.compile(args[2]).asPredicate();
	br.lines().filter(str -> pattern.test(getCell(str, column)))
	.forEach(System.out::println);
	break;
	case "groupby":
	verifyArgumentNumber(args, 3);
	/*
	* Group lines by values in the column with collect(...), and
	* print with forEach(...) for every distinct value within
	* the column.
	*/
	br.lines().collect(
	Collectors.groupingBy(str -> getCell(str, column),
	toCollection(TreeSet::new)))
	.forEach((str, set) -> {
	System.out.println(str + ":");
	set.forEach(System.out::println);
	});
	break;
	case "stat":
	verifyArgumentNumber(args, 3);

	/*
	* BufferedReader will be read several times.
	* Mark this point to return here after each pass.
	* BufferedReader will be read right after the headers line
	* because it is already read.
	*/
	br.mark(READ_AHEAD_LIMIT);

	/*
	* Statistics can be collected by a custom collector in one
	* pass. One pass is preferable.
	*/
	System.out.println(
	br.lines().collect(new Statistics(column)));

	/*
	* Alternatively, statistics can be collected
	* by a built-in API in several passes.
	* This method demonstrates how separate operations can be
	* implemented using a built-in API.
	*/
	br.reset();
	statInSeveralPasses(br, column);
	break;
	default:
	printUsageAndExit("Illegal argument" + args[0]);
	}
	} catch (IOException e) {
	printUsageAndExit(e.toString());
	}
	}

	private static void statInSeveralPasses(BufferedReader br, int column)
	throws IOException {
	System.out.println("#-----Statistics in several passes-------#");
	//Create a comparator to compare records by the column.
	Comparator<String> comparator
	= Comparator.comparing(
	(String str) -> parseDouble(getCell(str, column)));
	//Find max record by using Collectors.maxBy(...)
	System.out.println(
	"Max: " + br.lines().collect(maxBy(comparator)).get());
	br.reset();
	//Find min record by using Collectors.minBy(...)
	System.out.println(
	"Min: " + br.lines().collect(minBy(comparator)).get());
	br.reset();
	//Compute the average value and sum with
	//Collectors.toDoubleSummaryStatistics(...)
	DoubleSummaryStatistics doubleSummaryStatistics
	= br.lines().collect(summarizingDouble(
	str -> parseDouble(getCell(str, column))));
	System.out.println("Average: " + doubleSummaryStatistics.getAverage());
	System.out.println("Sum: " + doubleSummaryStatistics.getSum());
	}

	private static void verifyArgumentNumber(String[] args, int n) {
	if (args.length != n) {
	printUsageAndExit("Expected " + n + " arguments but was "
	+ args.length);
	}
	}

	private static int getColumnNumber(List<String> header, String name) {
	int column = header.indexOf(name);
	if (column == -1) {
	printUsageAndExit("There is no column with name " + name);
	}
	return column;
	}

	private static String getCell(String record, int column) {
	return record.split(",")[column].trim();
	}

	private static void printUsageAndExit(String... str) {
	System.out.println("Usages:");

	System.out.println("CSVProcessor sort COLUMN_NAME ASC\|DESC FILE");
	System.out.println("Sort lines by column COLUMN_NAME in CSV FILE\n");

	System.out.println("CSVProcessor search COLUMN_NAME REGEX FILE");
	System.out.println("Search for REGEX in column COLUMN_NAME in CSV FILE\n");

	System.out.println("CSVProcessor groupby COLUMN_NAME FILE");
	System.out.println("Split lines into different groups according to column "
	+ "COLUMN_NAME value\n");

	System.out.println("CSVProcessor stat COLUMN_NAME FILE");
	System.out.println("Compute max/min/average/sum statistics by column "
	+ "COLUMN_NAME\n");

	Arrays.asList(str).forEach(System.err::println);
	System.exit(1);
	}

	/*
	* This is a custom implementation of the Collector interface.
	* Statistics are objects gather max,min,sum,average statistics.
	*/
	private static class Statistics
	implements Collector<String, Statistics, Statistics> {


	/*
	* This implementation does not need to be thread safe because
	* the parallel implementation of
	* {@link java.util.stream.Stream#collect Stream.collect()}
	* provides the necessary partitioning and isolation for safe parallel
	* execution.
	*/
	private String maxRecord;
	private String minRecord;

	private double sum;
	private int lineCount;
	private final BinaryOperator<String> maxOperator;
	private final BinaryOperator<String> minOperator;
	private final int column;

	public Statistics(int column) {
	this.column = column;
	Comparator<String> cmp = Comparator.comparing(
	(String str) -> parseDouble(getCell(str, column)));
	maxOperator = BinaryOperator.maxBy(cmp);
	minOperator = BinaryOperator.minBy(cmp);
	}

	/*
	* Process line.
	*/
	public Statistics accept(String line) {
	maxRecord = maxRecord == null
	? line : maxOperator.apply(maxRecord, line);
	minRecord = minRecord == null
	? line : minOperator.apply(minRecord, line);

	sum += parseDouble(getCell(line, column));
	lineCount++;
	return this;
	}


	/*
	* Merge two Statistics.
	*/
	public Statistics combine(Statistics stat) {
	maxRecord = maxOperator.apply(maxRecord, stat.getMaxRecord());
	minRecord = minOperator.apply(minRecord, stat.getMinRecord());
	sum += stat.getSum();
	lineCount += stat.getLineCount();
	return this;
	}

	@Override
	public String toString() {
	StringBuilder sb = new StringBuilder();
	sb.append("#------Statistics------#\n");
	sb.append("Max: ").append(getMaxRecord()).append("\n");
	sb.append("Min: ").append(getMinRecord()).append("\n");
	sb.append("Sum = ").append(getSum()).append("\n");
	sb.append("Average = ").append(average()).append("\n");
	sb.append("#------Statistics------#\n");
	return sb.toString();
	}

	@Override
	public Supplier<Statistics> supplier() {
	return () -> new Statistics(column);
	}

	@Override
	public BiConsumer<Statistics, String> accumulator() {
	return Statistics::accept;
	}

	@Override
	public BinaryOperator<Statistics> combiner() {
	return Statistics::combine;

	}

	@Override
	public Function<Statistics, Statistics> finisher() {
	return stat -> stat;
	}

	@Override
	public Set<Characteristics> characteristics() {
	return EnumSet.of(Characteristics.IDENTITY_FINISH);
	}

	private String getMaxRecord() {
	return maxRecord;
	}

	private String getMinRecord() {
	return minRecord;
	}

	private double getSum() {
	return sum;
	}

	private double average() {
	return sum / lineCount;
	}

	private int getLineCount() {
	return lineCount;
	}

	}

	}