|  | #!/usr/bin/env python3 | 
|  | # Run with directory arguments from any directory, with no special setup | 
|  | # required. | 
|  |  | 
|  | import os | 
|  | from pathlib import Path | 
|  | import re | 
|  | import sys | 
|  | from typing import Sequence | 
|  |  | 
|  | VERBOSE = False | 
|  |  | 
|  | copyrights = set() | 
|  |  | 
|  |  | 
|  | def warn(s): | 
|  | sys.stderr.write("warning: %s\n" % s) | 
|  |  | 
|  |  | 
|  | def warn_verbose(s): | 
|  | if VERBOSE: | 
|  | warn(s) | 
|  |  | 
|  |  | 
|  | def is_interesting(path_str: str) -> bool: | 
|  | path = Path(path_str.lower()) | 
|  | uninteresting_extensions = [ | 
|  | ".bp", | 
|  | ".map", | 
|  | ".md", | 
|  | ".mk", | 
|  | ".py", | 
|  | ".pyc", | 
|  | ".swp", | 
|  | ".txt", | 
|  | ] | 
|  | if path.suffix in uninteresting_extensions: | 
|  | return False | 
|  | if path.name in {"notice", "readme", "pylintrc"}: | 
|  | return False | 
|  | # Backup files for some editors. | 
|  | if path.match("*~"): | 
|  | return False | 
|  | return True | 
|  |  | 
|  |  | 
|  | def is_auto_generated(content): | 
|  | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: | 
|  | return True | 
|  | if "This header was automatically generated from a Linux kernel header" in content: | 
|  | return True | 
|  | return False | 
|  |  | 
|  |  | 
|  | def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: | 
|  | endings = [ | 
|  | " $FreeBSD: ", | 
|  | "$Citrus$", | 
|  | "$FreeBSD$", | 
|  | "*/", | 
|  | "From: @(#)", | 
|  | # OpenBSD likes to say where stuff originally came from: | 
|  | "Original version ID:", | 
|  | "\t$Citrus: ", | 
|  | "\t$NetBSD: ", | 
|  | "\t$OpenBSD: ", | 
|  | "\t@(#)", | 
|  | "\tcitrus Id: ", | 
|  | "\tfrom: @(#)", | 
|  | "from OpenBSD:", | 
|  | ] | 
|  | if first_line_was_hash and not line: | 
|  | return True | 
|  |  | 
|  | for ending in endings: | 
|  | if ending in line: | 
|  | return True | 
|  |  | 
|  | return False | 
|  |  | 
|  |  | 
|  | def extract_copyright_at(lines: Sequence[str], i: int) -> int: | 
|  | first_line_was_hash = lines[i].startswith("#") | 
|  |  | 
|  | # Do we need to back up to find the start of the copyright header? | 
|  | start = i | 
|  | if not first_line_was_hash: | 
|  | while start > 0: | 
|  | if "/*" in lines[start - 1]: | 
|  | break | 
|  | start -= 1 | 
|  |  | 
|  | # Read comment lines until we hit something that terminates a | 
|  | # copyright header. | 
|  | while i < len(lines): | 
|  | if is_copyright_end(lines[i], first_line_was_hash): | 
|  | break | 
|  | i += 1 | 
|  |  | 
|  | end = i | 
|  |  | 
|  | # Trim trailing cruft. | 
|  | while end > 0: | 
|  | line = lines[end - 1] | 
|  | if line not in { | 
|  | " *", " * ====================================================" | 
|  | }: | 
|  | break | 
|  | end -= 1 | 
|  |  | 
|  | # Remove C/assembler comment formatting, pulling out just the text. | 
|  | clean_lines = [] | 
|  | for line in lines[start:end]: | 
|  | line = line.replace("\t", "    ") | 
|  | line = line.replace("/* ", "") | 
|  | line = re.sub(r"^ \* ", "", line) | 
|  | line = line.replace("** ", "") | 
|  | line = line.replace("# ", "") | 
|  | if "SPDX-License-Identifier:" in line: | 
|  | continue | 
|  | if line.startswith("++Copyright++"): | 
|  | continue | 
|  | line = line.replace("--Copyright--", "") | 
|  | line = line.rstrip() | 
|  | # These come last and take care of "blank" comment lines. | 
|  | if line in {"#", " *", "**", "-"}: | 
|  | line = "" | 
|  | clean_lines.append(line) | 
|  |  | 
|  | # Trim blank lines from head and tail. | 
|  | while clean_lines[0] == "": | 
|  | clean_lines = clean_lines[1:] | 
|  | while clean_lines[len(clean_lines) - 1] == "": | 
|  | clean_lines = clean_lines[0:(len(clean_lines) - 1)] | 
|  |  | 
|  | copyrights.add("\n".join(clean_lines)) | 
|  |  | 
|  | return i | 
|  |  | 
|  |  | 
|  | def do_file(path: str) -> None: | 
|  | raw = Path(path).read_bytes() | 
|  | try: | 
|  | content = raw.decode("utf-8") | 
|  | except UnicodeDecodeError: | 
|  | warn("bad UTF-8 in %s" % path) | 
|  | content = raw.decode("iso-8859-1") | 
|  |  | 
|  | lines = content.split("\n") | 
|  |  | 
|  | if len(lines) <= 4: | 
|  | warn_verbose("ignoring short file %s" % path) | 
|  | return | 
|  |  | 
|  | if is_auto_generated(content): | 
|  | warn_verbose("ignoring auto-generated file %s" % path) | 
|  | return | 
|  |  | 
|  | if not "Copyright" in content: | 
|  | if "public domain" in content.lower(): | 
|  | warn_verbose("ignoring public domain file %s" % path) | 
|  | return | 
|  | warn('no copyright notice found in "%s" (%d lines)' % | 
|  | (path, len(lines))) | 
|  | return | 
|  |  | 
|  | # Manually iterate because extract_copyright_at tells us how many lines to | 
|  | # skip. | 
|  | i = 0 | 
|  | while i < len(lines): | 
|  | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: | 
|  | i = extract_copyright_at(lines, i) | 
|  | else: | 
|  | i += 1 | 
|  |  | 
|  |  | 
|  | def do_dir(arg): | 
|  | for directory, sub_directories, filenames in os.walk(arg): | 
|  | if ".git" in sub_directories: | 
|  | sub_directories.remove(".git") | 
|  | sub_directories = sorted(sub_directories) | 
|  |  | 
|  | for filename in sorted(filenames): | 
|  | path = os.path.join(directory, filename) | 
|  | if is_interesting(path): | 
|  | do_file(path) | 
|  |  | 
|  |  | 
|  | def main() -> None: | 
|  | args = sys.argv[1:] | 
|  | if len(args) == 0: | 
|  | args = ["."] | 
|  |  | 
|  | for arg in args: | 
|  | if os.path.isdir(arg): | 
|  | do_dir(arg) | 
|  | else: | 
|  | do_file(arg) | 
|  |  | 
|  | for notice in sorted(copyrights): | 
|  | print(notice) | 
|  | print() | 
|  | print("-" * 67) | 
|  | print() | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | main() |