libc/tools/generate-NOTICE.py - platform/bionic - Git at Google

 #!/usr/bin/env python
 # Run with directory arguments from any directory, with no special setup required.

 import ftplib
 import hashlib
 import os
 import re
 import shutil
 import string
 import subprocess
 import sys
 import tarfile
 import tempfile

 VERBOSE = False

 def warn(s):
     sys.stderr.write("warning: %s\n" % s)

 def warn_verbose(s):
     if VERBOSE:
         warn(s)

 def is_interesting(path):
     path = path.lower()
     uninteresting_extensions = [
         ".bp",
         ".map",
         ".mk",
         ".py",
         ".pyc",
         ".swp",
         ".txt",
     ]
     if os.path.splitext(path)[1] in uninteresting_extensions:
         return False
     if path.endswith("/notice") or path.endswith("/readme"):
         return False
     return True

 def is_auto_generated(content):
     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
         return True
     if "This header was automatically generated from a Linux kernel header" in content:
         return True
     return False

 copyrights = set()

 def extract_copyright_at(lines, i):
     hash = lines[i].startswith("#")

     # Do we need to back up to find the start of the copyright header?
     start = i
     if not hash:
         while start > 0:
             if "/*" in lines[start - 1]:
                 break
             start -= 1

     # Read comment lines until we hit something that terminates a
     # copyright header.
     while i < len(lines):
         if "*/" in lines[i]:
             break
         if hash and len(lines[i]) == 0:
             break
         if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
             break
         if "\tcitrus Id: " in lines[i]:
             break
         if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
             break
         if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
             break
         # OpenBSD likes to say where stuff originally came from:
         if "Original version ID:" in lines[i]:
             break
         i += 1

     end = i

     # Trim trailing cruft.
     while end > 0:
         if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
             break
         end -= 1

     # Remove C/assembler comment formatting, pulling out just the text.
     clean_lines = []
     for line in lines[start:end]:
         line = line.replace("\t", "    ")
         line = line.replace("/* ", "")
         line = re.sub("^ \* ", "", line)
         line = line.replace("** ", "")
         line = line.replace("# ", "")
         if "SPDX-License-Identifier:" in line:
             continue
         if line.startswith("++Copyright++"):
             continue
         line = line.replace("--Copyright--", "")
         line = line.rstrip()
         # These come last and take care of "blank" comment lines.
         if line == "#" or line == " *" or line == "**" or line == "-":
             line = ""
         clean_lines.append(line)

     # Trim blank lines from head and tail.
     while clean_lines[0] == "":
         clean_lines = clean_lines[1:]
     while clean_lines[len(clean_lines) - 1] == "":
         clean_lines = clean_lines[0:(len(clean_lines) - 1)]

     copyright = "\n".join(clean_lines)
     copyrights.add(copyright)

     return i


 def do_file(path):
     with open(path, "r") as the_file:
         try:
             content = open(path, "r").read().decode("utf-8")
         except UnicodeDecodeError:
             warn("bad UTF-8 in %s" % path)
             content = open(path, "r").read().decode("iso-8859-1")

     lines = content.split("\n")

     if len(lines) <= 4:
         warn_verbose("ignoring short file %s" % path)
         return

     if is_auto_generated(content):
         warn_verbose("ignoring auto-generated file %s" % path)
         return

     if not "Copyright" in content:
         if "public domain" in content.lower():
             warn("ignoring public domain file %s" % path)
             return
         warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
         return

     # Manually iterate because extract_copyright_at tells us how many lines to skip.
     i = 0
     while i < len(lines):
         if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
             i = extract_copyright_at(lines, i)
         else:
             i += 1


 def do_dir(path):
     for directory, sub_directories, filenames in os.walk(arg):
         if ".git" in sub_directories:
             sub_directories.remove(".git")
         sub_directories = sorted(sub_directories)

         for filename in sorted(filenames):
             path = os.path.join(directory, filename)
             if is_interesting(path):
                 do_file(path)


 args = sys.argv[1:]
 if len(args) == 0:
     args = [ "." ]

 for arg in args:
     if os.path.isdir(arg):
         do_dir(arg)
     else:
         do_file(arg)

 for copyright in sorted(copyrights):
     print copyright.encode("utf-8")
     print
     print "-------------------------------------------------------------------"
     print

 sys.exit(0)
	#!/usr/bin/env python
	# Run with directory arguments from any directory, with no special setup required.

	import ftplib
	import hashlib
	import os
	import re
	import shutil
	import string
	import subprocess
	import sys
	import tarfile
	import tempfile

	VERBOSE = False

	def warn(s):
	sys.stderr.write("warning: %s\n" % s)

	def warn_verbose(s):
	if VERBOSE:
	warn(s)

	def is_interesting(path):
	path = path.lower()
	uninteresting_extensions = [
	".bp",
	".map",
	".mk",
	".py",
	".pyc",
	".swp",
	".txt",
	]
	if os.path.splitext(path)[1] in uninteresting_extensions:
	return False
	if path.endswith("/notice") or path.endswith("/readme"):
	return False
	return True

	def is_auto_generated(content):
	if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
	return True
	if "This header was automatically generated from a Linux kernel header" in content:
	return True
	return False

	copyrights = set()

	def extract_copyright_at(lines, i):
	hash = lines[i].startswith("#")

	# Do we need to back up to find the start of the copyright header?
	start = i
	if not hash:
	while start > 0:
	if "/*" in lines[start - 1]:
	break
	start -= 1

	# Read comment lines until we hit something that terminates a
	# copyright header.
	while i < len(lines):
	if "*/" in lines[i]:
	break
	if hash and len(lines[i]) == 0:
	break
	if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
	break
	if "\tcitrus Id: " in lines[i]:
	break
	if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
	break
	if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
	break
	# OpenBSD likes to say where stuff originally came from:
	if "Original version ID:" in lines[i]:
	break
	i += 1

	end = i

	# Trim trailing cruft.
	while end > 0:
	if lines[end - 1] != " " and lines[end - 1] != " ====================================================":
	break
	end -= 1

	# Remove C/assembler comment formatting, pulling out just the text.
	clean_lines = []
	for line in lines[start:end]:
	line = line.replace("\t", " ")
	line = line.replace("/* ", "")
	line = re.sub("^ \* ", "", line)
	line = line.replace("** ", "")
	line = line.replace("# ", "")
	if "SPDX-License-Identifier:" in line:
	continue
	if line.startswith("++Copyright++"):
	continue
	line = line.replace("--Copyright--", "")
	line = line.rstrip()
	# These come last and take care of "blank" comment lines.
	if line == "#" or line == " " or line == "*" or line == "-":
	line = ""
	clean_lines.append(line)

	# Trim blank lines from head and tail.
	while clean_lines[0] == "":
	clean_lines = clean_lines[1:]
	while clean_lines[len(clean_lines) - 1] == "":
	clean_lines = clean_lines[0:(len(clean_lines) - 1)]

	copyright = "\n".join(clean_lines)
	copyrights.add(copyright)

	return i


	def do_file(path):
	with open(path, "r") as the_file:
	try:
	content = open(path, "r").read().decode("utf-8")
	except UnicodeDecodeError:
	warn("bad UTF-8 in %s" % path)
	content = open(path, "r").read().decode("iso-8859-1")

	lines = content.split("\n")

	if len(lines) <= 4:
	warn_verbose("ignoring short file %s" % path)
	return

	if is_auto_generated(content):
	warn_verbose("ignoring auto-generated file %s" % path)
	return

	if not "Copyright" in content:
	if "public domain" in content.lower():
	warn("ignoring public domain file %s" % path)
	return
	warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
	return

	# Manually iterate because extract_copyright_at tells us how many lines to skip.
	i = 0
	while i < len(lines):
	if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
	i = extract_copyright_at(lines, i)
	else:
	i += 1


	def do_dir(path):
	for directory, sub_directories, filenames in os.walk(arg):
	if ".git" in sub_directories:
	sub_directories.remove(".git")
	sub_directories = sorted(sub_directories)

	for filename in sorted(filenames):
	path = os.path.join(directory, filename)
	if is_interesting(path):
	do_file(path)


	args = sys.argv[1:]
	if len(args) == 0:
	args = [ "." ]

	for arg in args:
	if os.path.isdir(arg):
	do_dir(arg)
	else:
	do_file(arg)

	for copyright in sorted(copyrights):
	print copyright.encode("utf-8")
	print
	print "-------------------------------------------------------------------"
	print

	sys.exit(0)