make/jdk/src/classes/build/tools/publicsuffixlist/GeneratePublicSuffixList.java - platform/libcore - Git at Google

 /*
  * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 package build.tools.publicsuffixlist;

 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.InputStreamReader;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.nio.file.attribute.FileTime;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;

 /**
  * This tool takes the original Mozilla public suffix rule list as input
  * and slices it into a set of files, one for each top-level domain.
  * Each file contains only the rules for that domain. Lines containing comments
  * or only whitespace are not copied. Each of these files are then combined
  * into the target zipfile.
  *
  * Usage: java GeneratePublicSuffixList mozilla_file destination_zipfile
  */
 public final class GeneratePublicSuffixList {
     // patterns
     private static final String COMMENT = "//";
     private static final String BEGIN_PRIVATE = "// ===BEGIN PRIVATE DOMAINS===";
     private static final Pattern WHITESPACE = Pattern.compile("\\s*");
     private static final byte ICANN = 0x00;
     private static final byte PRIVATE = 0x01;

     private static class Domain {
         final String name;
         final byte type;
         Domain(String name, byte type) {
             this.name = name;
             this.type = type;
         }
     }

     public static void main(String[] args) throws Exception {
         if (args.length != 2) {
             throw new Exception("2 args required: input_file output_file");
         }
         try (FileInputStream fis = new FileInputStream(args[0]);
              ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(args[1])))
         {
             BufferedReader br =
                 new BufferedReader(new InputStreamReader(fis, "UTF-8"));

             List<Domain> domains = new LinkedList<>();
             byte type = ICANN;
             String line;
             while ((line = br.readLine()) != null) {
                 if (line.startsWith(COMMENT)) {
                     if (line.startsWith(BEGIN_PRIVATE)) {
                         type = PRIVATE;
                     }
                     continue;
                 }
                 if (WHITESPACE.matcher(line).matches()) {
                     continue;
                 }
                 domains.add(new Domain(line, type));
             }
             // have a list of rules now

             // Map of TLD names to rules with the same TLD
             Map<String, List<Domain>> rules = addDomains(domains);

             // stream for writing the file contents
             BufferedWriter bw =
                 new BufferedWriter(new OutputStreamWriter(zos, "UTF-8"));

             // now output each map entry to its own file,
             // whose filename is the TLD
             writeRules(zos, bw, rules);
         }
     }

     private static Map<String, List<Domain>> addDomains(List<Domain> domains) {
         Map<String, List<Domain>> rules = new HashMap<>();
         for (Domain domain : domains) {
             String tld = getTLD(domain.name);

             rules.compute(tld, (k, v) -> {
                 if (v == null) {
                     List<Domain> newV = new LinkedList<>();
                     newV.add(domain);
                     return newV;
                 } else {
                     v.add(domain);
                     return v;
                 }
             });
         }
         return rules;
     }

     private static void writeRules(ZipOutputStream zos, BufferedWriter bw,
                                    Map<String, List<Domain>> rules)
                                    throws IOException {
         // Sort keys for deterministic output
         List<String> tlds = rules.keySet().stream().sorted().collect(Collectors.toList());
         for (String tld : tlds) {
             List<Domain> entries = rules.get(tld);
             ZipEntry ze = new ZipEntry(tld);
             ze.setLastModifiedTime(FileTime.fromMillis(0));
             zos.putNextEntry(ze);
             for (Domain entry : entries) {
                 bw.write(entry.type);
                 bw.write(entry.name, 0, entry.name.length());
                 bw.newLine();
             }
             bw.flush();
         }
     }

     private static String getTLD(String line) {
         int dotIndex = line.lastIndexOf('.');
         return (dotIndex == -1) ? line : line.substring(dotIndex + 1);
     }
 }
	/*
	* Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	package build.tools.publicsuffixlist;

	import java.io.BufferedReader;
	import java.io.BufferedWriter;
	import java.io.FileInputStream;
	import java.io.FileOutputStream;
	import java.io.InputStreamReader;
	import java.io.IOException;
	import java.io.OutputStreamWriter;
	import java.nio.file.attribute.FileTime;
	import java.util.HashMap;
	import java.util.LinkedList;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;
	import java.util.regex.Pattern;
	import java.util.stream.Collectors;
	import java.util.zip.ZipEntry;
	import java.util.zip.ZipOutputStream;

	/**
	* This tool takes the original Mozilla public suffix rule list as input
	* and slices it into a set of files, one for each top-level domain.
	* Each file contains only the rules for that domain. Lines containing comments
	* or only whitespace are not copied. Each of these files are then combined
	* into the target zipfile.
	*
	* Usage: java GeneratePublicSuffixList mozilla_file destination_zipfile
	*/
	public final class GeneratePublicSuffixList {
	// patterns
	private static final String COMMENT = "//";
	private static final String BEGIN_PRIVATE = "// ===BEGIN PRIVATE DOMAINS===";
	private static final Pattern WHITESPACE = Pattern.compile("\\s*");
	private static final byte ICANN = 0x00;
	private static final byte PRIVATE = 0x01;

	private static class Domain {
	final String name;
	final byte type;
	Domain(String name, byte type) {
	this.name = name;
	this.type = type;
	}
	}

	public static void main(String[] args) throws Exception {
	if (args.length != 2) {
	throw new Exception("2 args required: input_file output_file");
	}
	try (FileInputStream fis = new FileInputStream(args[0]);
	ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(args[1])))
	{
	BufferedReader br =
	new BufferedReader(new InputStreamReader(fis, "UTF-8"));

	List<Domain> domains = new LinkedList<>();
	byte type = ICANN;
	String line;
	while ((line = br.readLine()) != null) {
	if (line.startsWith(COMMENT)) {
	if (line.startsWith(BEGIN_PRIVATE)) {
	type = PRIVATE;
	}
	continue;
	}
	if (WHITESPACE.matcher(line).matches()) {
	continue;
	}
	domains.add(new Domain(line, type));
	}
	// have a list of rules now

	// Map of TLD names to rules with the same TLD
	Map<String, List<Domain>> rules = addDomains(domains);

	// stream for writing the file contents
	BufferedWriter bw =
	new BufferedWriter(new OutputStreamWriter(zos, "UTF-8"));

	// now output each map entry to its own file,
	// whose filename is the TLD
	writeRules(zos, bw, rules);
	}
	}

	private static Map<String, List<Domain>> addDomains(List<Domain> domains) {
	Map<String, List<Domain>> rules = new HashMap<>();
	for (Domain domain : domains) {
	String tld = getTLD(domain.name);

	rules.compute(tld, (k, v) -> {
	if (v == null) {
	List<Domain> newV = new LinkedList<>();
	newV.add(domain);
	return newV;
	} else {
	v.add(domain);
	return v;
	}
	});
	}
	return rules;
	}

	private static void writeRules(ZipOutputStream zos, BufferedWriter bw,
	Map<String, List<Domain>> rules)
	throws IOException {
	// Sort keys for deterministic output
	List<String> tlds = rules.keySet().stream().sorted().collect(Collectors.toList());
	for (String tld : tlds) {
	List<Domain> entries = rules.get(tld);
	ZipEntry ze = new ZipEntry(tld);
	ze.setLastModifiedTime(FileTime.fromMillis(0));
	zos.putNextEntry(ze);
	for (Domain entry : entries) {
	bw.write(entry.type);
	bw.write(entry.name, 0, entry.name.length());
	bw.newLine();
	}
	bw.flush();
	}
	}

	private static String getTLD(String line) {
	int dotIndex = line.lastIndexOf('.');
	return (dotIndex == -1) ? line : line.substring(dotIndex + 1);
	}
	}