| #!/usr/bin/env python |
| # Run with directory arguments from any directory, with no special setup required. |
| |
| import ftplib |
| import hashlib |
| import os |
| import re |
| import shutil |
| import string |
| import subprocess |
| import sys |
| import tarfile |
| import tempfile |
| |
| VERBOSE = False |
| |
| def warn(s): |
| sys.stderr.write("warning: %s\n" % s) |
| |
| def warn_verbose(s): |
| if VERBOSE: |
| warn(s) |
| |
| def is_interesting(path): |
| path = path.lower() |
| uninteresting_extensions = [ |
| ".bp", |
| ".map", |
| ".mk", |
| ".py", |
| ".pyc", |
| ".swp", |
| ".txt", |
| ] |
| if os.path.splitext(path)[1] in uninteresting_extensions: |
| return False |
| if path.endswith("/notice") or path.endswith("/readme"): |
| return False |
| return True |
| |
| def is_auto_generated(content): |
| if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
| return True |
| if "This header was automatically generated from a Linux kernel header" in content: |
| return True |
| return False |
| |
| copyrights = set() |
| |
| def extract_copyright_at(lines, i): |
| hash = lines[i].startswith("#") |
| |
| # Do we need to back up to find the start of the copyright header? |
| start = i |
| if not hash: |
| while start > 0: |
| if "/*" in lines[start - 1]: |
| break |
| start -= 1 |
| |
| # Read comment lines until we hit something that terminates a |
| # copyright header. |
| while i < len(lines): |
| if "*/" in lines[i]: |
| break |
| if hash and len(lines[i]) == 0: |
| break |
| if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: |
| break |
| if "\tcitrus Id: " in lines[i]: |
| break |
| if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: |
| break |
| if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: |
| break |
| # OpenBSD likes to say where stuff originally came from: |
| if "Original version ID:" in lines[i]: |
| break |
| i += 1 |
| |
| end = i |
| |
| # Trim trailing cruft. |
| while end > 0: |
| if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": |
| break |
| end -= 1 |
| |
| # Remove C/assembler comment formatting, pulling out just the text. |
| clean_lines = [] |
| for line in lines[start:end]: |
| line = line.replace("\t", " ") |
| line = line.replace("/* ", "") |
| line = re.sub("^ \* ", "", line) |
| line = line.replace("** ", "") |
| line = line.replace("# ", "") |
| if "SPDX-License-Identifier:" in line: |
| continue |
| if line.startswith("++Copyright++"): |
| continue |
| line = line.replace("--Copyright--", "") |
| line = line.rstrip() |
| # These come last and take care of "blank" comment lines. |
| if line == "#" or line == " *" or line == "**" or line == "-": |
| line = "" |
| clean_lines.append(line) |
| |
| # Trim blank lines from head and tail. |
| while clean_lines[0] == "": |
| clean_lines = clean_lines[1:] |
| while clean_lines[len(clean_lines) - 1] == "": |
| clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| |
| copyright = "\n".join(clean_lines) |
| copyrights.add(copyright) |
| |
| return i |
| |
| |
| def do_file(path): |
| with open(path, "r") as the_file: |
| try: |
| content = open(path, "r").read().decode("utf-8") |
| except UnicodeDecodeError: |
| warn("bad UTF-8 in %s" % path) |
| content = open(path, "r").read().decode("iso-8859-1") |
| |
| lines = content.split("\n") |
| |
| if len(lines) <= 4: |
| warn_verbose("ignoring short file %s" % path) |
| return |
| |
| if is_auto_generated(content): |
| warn_verbose("ignoring auto-generated file %s" % path) |
| return |
| |
| if not "Copyright" in content: |
| if "public domain" in content.lower(): |
| warn("ignoring public domain file %s" % path) |
| return |
| warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) |
| return |
| |
| # Manually iterate because extract_copyright_at tells us how many lines to skip. |
| i = 0 |
| while i < len(lines): |
| if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| i = extract_copyright_at(lines, i) |
| else: |
| i += 1 |
| |
| |
| def do_dir(path): |
| for directory, sub_directories, filenames in os.walk(arg): |
| if ".git" in sub_directories: |
| sub_directories.remove(".git") |
| sub_directories = sorted(sub_directories) |
| |
| for filename in sorted(filenames): |
| path = os.path.join(directory, filename) |
| if is_interesting(path): |
| do_file(path) |
| |
| |
| args = sys.argv[1:] |
| if len(args) == 0: |
| args = [ "." ] |
| |
| for arg in args: |
| if os.path.isdir(arg): |
| do_dir(arg) |
| else: |
| do_file(arg) |
| |
| for copyright in sorted(copyrights): |
| print copyright.encode("utf-8") |
| print |
| print "-------------------------------------------------------------------" |
| print |
| |
| sys.exit(0) |