Allow passing filenames to generate-NOTICE.py.

For the libandroid_support NOTICE file, we need to combine all the files
in that directory, plus the specific files pulled from bionic.

Also cleaned up some of the Python style.

Bug: N/A
Test: used for libandroid_support
Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1
diff --git a/libc/NOTICE b/libc/NOTICE
index bcc9691..eba25ab 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -862,22 +862,6 @@
 
      http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
--------------------------------------------------------------------
-
-Copyright (C) 2015 The Android Open Source Project
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
diff --git a/libc/tools/generate-NOTICE.py b/libc/tools/generate-NOTICE.py
index 6573644..d40891c 100755
--- a/libc/tools/generate-NOTICE.py
+++ b/libc/tools/generate-NOTICE.py
@@ -14,17 +14,33 @@
 import tarfile
 import tempfile
 
-def IsUninteresting(path):
-    path = path.lower()
-    if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3") or path.endswith(".swp"):
-        return True
-    if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
-        return True
-    if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
-        return True
-    return False
+VERBOSE = False
 
-def IsAutoGenerated(content):
+def warn(s):
+    sys.stderr.write("warning: %s\n" % s)
+
+def warn_verbose(s):
+    if VERBOSE:
+        warn(s)
+
+def is_interesting(path):
+    path = path.lower()
+    uninteresting_extensions = [
+        ".bp",
+        ".map",
+        ".mk",
+        ".py",
+        ".pyc",
+        ".swp",
+        ".txt",
+    ]
+    if os.path.splitext(path)[1] in uninteresting_extensions:
+        return False
+    if path.endswith("/notice") or path.endswith("/readme"):
+        return False
+    return True
+
+def is_auto_generated(content):
     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
         return True
     if "This header was automatically generated from a Linux kernel header" in content:
@@ -33,7 +49,7 @@
 
 copyrights = set()
 
-def ExtractCopyrightAt(lines, i):
+def extract_copyright_at(lines, i):
     hash = lines[i].startswith("#")
 
     # Do we need to back up to find the start of the copyright header?
@@ -100,13 +116,42 @@
 
     return i
 
-args = sys.argv[1:]
-if len(args) == 0:
-    args = [ "." ]
 
-for arg in args:
-    sys.stderr.write('Searching for source files in "%s"...\n' % arg)
+def do_file(path):
+    with open(path, "r") as the_file:
+        try:
+            content = open(path, "r").read().decode("utf-8")
+        except UnicodeDecodeError:
+            warn("bad UTF-8 in %s" % path)
+            content = open(path, "r").read().decode("iso-8859-1")
 
+    lines = content.split("\n")
+
+    if len(lines) <= 4:
+        warn_verbose("ignoring short file %s" % path)
+        return
+
+    if is_auto_generated(content):
+        warn_verbose("ignoring auto-generated file %s" % path)
+        return
+
+    if not "Copyright" in content:
+        if "public domain" in content.lower():
+            warn("ignoring public domain file %s" % path)
+            return
+        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
+        return
+
+    # Manually iterate because extract_copyright_at tells us how many lines to skip.
+    i = 0
+    while i < len(lines):
+        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
+            i = extract_copyright_at(lines, i)
+        else:
+            i += 1
+
+
+def do_dir(path):
     for directory, sub_directories, filenames in os.walk(arg):
         if ".git" in sub_directories:
             sub_directories.remove(".git")
@@ -114,45 +159,24 @@
 
         for filename in sorted(filenames):
             path = os.path.join(directory, filename)
-            if IsUninteresting(path):
-                #print "ignoring uninteresting file %s" % path
-                continue
+            if is_interesting(path):
+                do_file(path)
 
-            try:
-                content = open(path, 'r').read().decode('utf-8')
-            except:
-                sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
-                content = open(path, 'r').read().decode('iso-8859-1')
 
-            lines = content.split("\n")
+args = sys.argv[1:]
+if len(args) == 0:
+    args = [ "." ]
 
-            if len(lines) <= 4:
-                #print "ignoring short file %s" % path
-                continue
-
-            if IsAutoGenerated(content):
-                #print "ignoring auto-generated file %s" % path
-                continue
-
-            if not "Copyright" in content:
-                if "public domain" in content.lower():
-                    #print "ignoring public domain file %s" % path
-                    continue
-                sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
-                continue
-
-            i = 0
-            while i < len(lines):
-                if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
-                    i = ExtractCopyrightAt(lines, i)
-                i += 1
-
-            #print path
+for arg in args:
+    if os.path.isdir(arg):
+        do_dir(arg)
+    else:
+        do_file(arg)
 
 for copyright in sorted(copyrights):
-    print copyright.encode('utf-8')
+    print copyright.encode("utf-8")
     print
-    print '-------------------------------------------------------------------'
+    print "-------------------------------------------------------------------"
     print
 
 sys.exit(0)