| #!/usr/bin/env python3 |
| |
| from enum import Enum |
| from pathlib import Path |
| from typing import Sequence |
| from typing import Tuple |
| import argparse |
| import os |
| import re |
| import sys |
| |
| # list of specific files to be ignored. |
| IGNORE_FILE_NAME = [ |
| # Exclude myself |
| "generate_notice.py", |
| |
| # License files |
| "LICENSE", |
| "LICENSE.TXT", |
| "LICENSE_APACHE2.TXT", |
| "LICENSE_BSD_3_CLAUSE.TXT", |
| "LICENSE_FSFAP.TXT", |
| "LICENSE_MIT.TXT", |
| "LICENSE_MIT_MODERN_VARIANT.TXT", |
| "MODULE_LICENSE_BSD_LIKE", |
| "NOTICE", |
| "builds/unix/LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT", |
| "builds/unix/LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT", |
| "docs/FTL.TXT", |
| "docs/GPLv2.TXT", |
| "src/gzip/LICENSE_ZLIB.TXT", |
| |
| # The patch file contains copyright line as a diff. Use it if Copyright is not |
| # in a unified diff line. |
| "src/gzip/patches/freetype-zlib.diff", |
| ] |
| |
| NO_COPYRIGHT_FILES = [ |
| ".clang-format", |
| ".gitignore", |
| ".gitlab-ci.yml", |
| ".mailmap", |
| "Android.bp", |
| "METADATA", |
| "OWNERS", |
| "README.android", |
| "TEST_MAPPING", |
| "builds/atari/ATARI.H", |
| "builds/atari/FNames.SIC", |
| "builds/atari/FREETYPE.PRJ", |
| "builds/atari/README.TXT", |
| "builds/atari/deflinejoiner.awk", |
| "builds/atari/gen-purec-patch.sh", |
| "builds/mac/FreeType.m68k_cfm.make.txt", |
| "builds/mac/FreeType.m68k_far.make.txt", |
| "builds/mac/FreeType.ppc_carbon.make.txt", |
| "builds/mac/FreeType.ppc_classic.make.txt", |
| "builds/mac/README", |
| "builds/mac/ascii2mpw.py", |
| "builds/mac/freetype-Info.plist", |
| "builds/mac/ftlib.prj.xml", |
| "builds/unix/.gitignore", |
| "builds/unix/freetype2.in", |
| "builds/vms/LIBS.OPT_IA64", |
| "builds/vms/_LINK.OPT_IA64", |
| "builds/vms/vmslib.dat", |
| "builds/wince/vc2005-ce/freetype.sln", |
| "builds/wince/vc2005-ce/freetype.vcproj", |
| "builds/wince/vc2005-ce/index.html", |
| "builds/wince/vc2008-ce/freetype.sln", |
| "builds/wince/vc2008-ce/freetype.vcproj", |
| "builds/wince/vc2008-ce/index.html", |
| "builds/windows/.gitignore", |
| "builds/windows/vc2010/freetype.sln", |
| "builds/windows/vc2010/freetype.user.props", |
| "builds/windows/vc2010/freetype.vcxproj", |
| "builds/windows/vc2010/freetype.vcxproj.filters", |
| "builds/windows/vc2010/index.html", |
| "builds/windows/visualc/freetype.dsp", |
| "builds/windows/visualc/freetype.dsw", |
| "builds/windows/visualc/freetype.sln", |
| "builds/windows/visualc/freetype.vcproj", |
| "builds/windows/visualc/index.html", |
| "builds/windows/visualce/freetype.dsp", |
| "builds/windows/visualce/freetype.dsw", |
| "builds/windows/visualce/freetype.vcproj", |
| "builds/windows/visualce/index.html", |
| "devel-teeui/OWNERS", |
| "devel-teeui/README.md", |
| "devel-teeui/ftmodule.h", |
| "devel-teeui/rules.json", |
| "devel-teeui/rules.mk", |
| "docs/.gitignore", |
| "docs/CMAKE", |
| "docs/INSTALL.MAC", |
| "docs/MAKEPP", |
| "docs/PROBLEMS", |
| "docs/README", |
| "docs/freetype-config.1", |
| "docs/markdown/images/favico.ico", |
| "docs/markdown/javascripts/extra.js", |
| "docs/markdown/stylesheets/extra.css", |
| "include/freetype/config/ftmodule.h", |
| "include/freetype/ftchapters.h", |
| "libft2.map.txt", |
| "objs/.gitignore", |
| "objs/README", |
| "src/gzip/README.freetype", |
| "src/gzip/crc32.h", |
| "src/gzip/inffixed.h", |
| "src/tools/apinames.c", |
| "src/tools/chktrcmp.py", |
| "src/tools/cordic.py", |
| "src/tools/ftrandom/Makefile", |
| "src/tools/ftrandom/README", |
| "src/tools/make_distribution_archives.py", |
| "src/tools/no-copyright", |
| "src/tools/test_afm.c", |
| "src/tools/test_bbox.c", |
| "src/tools/test_trig.c", |
| "src/tools/update-copyright", |
| "subprojects/harfbuzz.wrap", |
| "subprojects/libpng.wrap", |
| "subprojects/zlib.wrap", |
| "tests/README.md", |
| "tests/issue-1063/main.c", |
| "tests/meson.build", |
| "tests/scripts/download-test-fonts.py", |
| ] |
| |
| class CommentType(Enum): |
| C_STYLE_BLOCK = 1 # /* ... */ |
| C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments. |
| C_STYLE_LINE = 3 # // ... |
| SCRIPT_STYLE_HASH = 4 # # ... |
| SCRIPT_STYLE_DOLLER = 5 # $! ... |
| DOC_STYLE = 6 # no comment escape |
| UNKNOWN = 10000 |
| |
| |
| # Helper function of showing error message and immediate exit. |
| def fatal(msg: str): |
| sys.stderr.write(msg) |
| sys.stderr.write("\n") |
| sys.exit(1) |
| |
| |
| def warn(msg: str): |
| sys.stderr.write(msg) |
| sys.stderr.write("\n") |
| |
| |
| def cleanup_and_join(out_lines: Sequence[str]): |
| while not out_lines[-1].strip(): |
| out_lines.pop(-1) |
| |
| # If all lines starts from empty space, strip it out. |
| while all([len(x) == 0 or x[0] == ' ' for x in out_lines]): |
| out_lines = [x[1:] for x in out_lines] |
| |
| if not out_lines: |
| fatal("Failed to get copyright info") |
| return "\n".join(out_lines) |
| |
| |
| def get_comment_type(copyright_line: str, path: str) -> CommentType: |
| # vms_make.com contains multiple copyright header as a string constants. |
| if path.endswith("/vms_make.com"): |
| return CommentType.SCRIPT_STYLE_DOLLER |
| |
| if "docs/" in path or "README" in path: |
| return CommentType.DOC_STYLE |
| |
| if copyright_line.startswith("#"): |
| return CommentType.SCRIPT_STYLE_HASH |
| if copyright_line.startswith("//"): |
| return CommentType.C_STYLE_LINE |
| if copyright_line.startswith("$!"): |
| return CommentType.SCRIPT_STYLE_DOLLER |
| |
| if "/*" in copyright_line and "*/" in copyright_line: |
| # ftrandom.c uses single line block comment for the first Copyright line, |
| # and following license notice is wrapped with single block comment. |
| # This file can be handled by C_STYLE_BLOCK parser. |
| if path.endswith("src/tools/ftrandom/ftrandom.c"): |
| return CommentType.C_STYLE_BLOCK |
| else: |
| return CommentType.C_STYLE_BLOCK_AS_LINE |
| else: |
| return CommentType.C_STYLE_BLOCK |
| |
| |
| # Extract copyright notice and returns next index. |
| def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| commentType = get_comment_type(lines[i], path) |
| |
| if commentType == CommentType.C_STYLE_BLOCK: |
| return extract_from_c_style_block_at(lines, i, path) |
| if commentType == CommentType.C_STYLE_BLOCK_AS_LINE: |
| return extract_from_c_style_block_as_line_at(lines, i, path) |
| elif commentType == CommentType.C_STYLE_LINE: |
| return extract_from_c_style_lines_at(lines, i, path) |
| elif commentType == CommentType.SCRIPT_STYLE_HASH: |
| return extract_from_script_hash_at(lines, i, path) |
| elif commentType == CommentType.SCRIPT_STYLE_DOLLER: |
| return extract_from_script_doller_at(lines, i, path) |
| elif commentType == CommentType.DOC_STYLE: |
| return extract_from_doc_style_at(lines, i, path) |
| else: |
| fatal("Uknown comment style: %s" % lines[i]) |
| |
| |
| def extract_from_doc_style_at( |
| lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| if not lines[i].startswith("Copyright"): |
| return (None, i + 1) |
| |
| def is_copyright_end(lines: str, start: int, i: int) -> bool: |
| # treat double spacing as end of license header |
| if i - start > 4 and lines[i] == "" and lines[i + 1] == "": |
| return True |
| return False |
| |
| start = i |
| while i < len(lines): |
| if is_copyright_end(lines, start, i): |
| break |
| i += 1 |
| end = i |
| |
| if start == end: |
| fatal("Failed to get copyright info") |
| out_lines = lines[start:end] |
| |
| return (cleanup_and_join(out_lines), i + 1) |
| |
| |
| def extract_from_c_style_lines_at( |
| lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| def is_copyright_end(line): |
| if line.startswith("//"): |
| return False |
| else: |
| return True |
| start = i |
| while i < len(lines): |
| if is_copyright_end(lines[i]): |
| break |
| i += 1 |
| end = i |
| |
| if start == end: |
| fatal("Failed to get copyright info") |
| |
| out_lines = [] |
| for line in lines[start:end]: |
| if line.startswith("// "): |
| out_lines.append(line[3:]) |
| elif line == "//": |
| out_lines.append(line[2:]) |
| else: |
| out_lines.append(line) |
| |
| return (cleanup_and_join(out_lines), i + 1) |
| |
| |
| def extract_from_script_hash_at( |
| lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| if lines[i].strip()[0] != "#": |
| return (None, i + 1) |
| def is_copyright_end(lines: str, i: int) -> bool: |
| if "#" not in lines[i]: |
| return True |
| # treat double spacing as end of license header |
| if lines[i] == "#" and lines[i+1] == "#": |
| return True |
| return False |
| |
| start = i |
| while i < len(lines): |
| if is_copyright_end(lines, i): |
| break |
| i += 1 |
| end = i |
| |
| if start == end: |
| fatal("Failed to get copyright info") |
| |
| out_lines = [] |
| for line in lines[start:end]: |
| if line.startswith("# "): |
| out_lines.append(line[2:]) |
| elif line == "#": |
| out_lines.append(line[1:]) |
| else: |
| out_lines.append(line) |
| |
| return (cleanup_and_join(out_lines), i + 1) |
| |
| |
| def extract_from_script_doller_at( |
| lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| if not lines[i].strip().startswith("$!"): |
| return (None, i + 1) |
| def is_copyright_end(lines: str, i: int) -> bool: |
| if "$!" not in lines[i]: |
| return True |
| # treat double spacing as end of license header |
| if lines[i] == "$!" and lines[i+1] == "$!": |
| return True |
| return False |
| |
| start = i |
| while i < len(lines): |
| if is_copyright_end(lines, i): |
| break |
| i += 1 |
| end = i + 1 |
| |
| if start == end: |
| fatal("Failed to get copyright info") |
| |
| out_lines = [] |
| for line in lines[start:end]: |
| if line.startswith("$! "): |
| out_lines.append(line[3:]) |
| elif line == "$!": |
| out_lines.append(line[2:]) |
| else: |
| out_lines.append(line) |
| |
| return (cleanup_and_join(out_lines), i + 1) |
| |
| |
| def extract_from_c_style_block_at( |
| lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| |
| def is_copyright_end(lines: str, i: int) -> bool: |
| if "*/" in lines[i]: |
| return True |
| if "understand and accept it fully." in lines[i]: |
| return True |
| if "see copyright notice in zlib.h" in lines[i]: |
| return True |
| if lines[i] == " *" and lines[i + 1] == " *": |
| return True |
| if lines[i] == "" and lines[i + 1] == "": |
| return True |
| return False |
| |
| start = i |
| i += 1 # include at least one line |
| while i < len(lines): |
| if is_copyright_end(lines, i): |
| break |
| i += 1 |
| end = i + 1 |
| |
| out_lines = [] |
| for line in lines[start:end]: |
| clean_line = line |
| |
| # Strip begining "/*" chars |
| if clean_line.startswith("/* "): |
| clean_line = clean_line[3:] |
| if clean_line == "/*": |
| clean_line = clean_line[2:] |
| |
| # Strip ending "*/" chars |
| if clean_line.endswith(" */"): |
| clean_line = clean_line[:-3] |
| if clean_line.endswith("*/"): |
| clean_line = clean_line[:-2] |
| |
| # Strip starting " *" chars |
| if clean_line.startswith(" * "): |
| clean_line = clean_line[3:] |
| if clean_line == " *": |
| clean_line = line[2:] |
| |
| # Strip trailing spaces |
| clean_line = clean_line.rstrip() |
| |
| out_lines.append(clean_line) |
| |
| return (cleanup_and_join(out_lines), i + 1) |
| |
| |
| def extract_from_c_style_block_as_line_at( |
| lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| |
| def is_copyright_end(line: str) -> bool: |
| if "*/" in line: |
| return False |
| if re.match(r'/\*+/', line.strip()): |
| return False |
| return True |
| |
| start = i |
| i += 1 # include at least one line |
| while i < len(lines): |
| if is_copyright_end(lines[i]): |
| break |
| i += 1 |
| end = i + 1 |
| |
| out_lines = [] |
| for line in lines[start:end]: |
| clean_line = line |
| |
| if re.match(r'/\*+/', line.strip()): |
| continue |
| |
| # Strip begining "/*" chars |
| if clean_line.startswith("/* "): |
| clean_line = clean_line[3:] |
| if clean_line == "/*": |
| clean_line = clean_line[2:] |
| |
| # Strip ending "*/" chars |
| if clean_line.endswith(" */"): |
| clean_line = clean_line[:-3] |
| if clean_line.endswith("*/"): |
| clean_line = clean_line[:-2] |
| |
| # Strip starting " *" chars |
| if clean_line.startswith(" * "): |
| clean_line = clean_line[3:] |
| if clean_line == " *": |
| clean_line = line[2:] |
| |
| # Strip trailing spaces |
| clean_line = clean_line.rstrip() |
| |
| out_lines.append(clean_line) |
| |
| return (cleanup_and_join(out_lines), i + 1) |
| |
| # Returns true if the line shows the start of copyright notice. |
| def is_copyright_line(line: str, path: str) -> bool: |
| if "Copyright" not in line: |
| return False |
| |
| # For avoiding unexpected mismatches, exclude quoted Copyright string. |
| if "`Copyright'" in line: # For src/psaux/psobjs.c |
| return False |
| if "\"Copyright\"" in line: # For src/cff/cfftoken.h |
| return False |
| |
| if (path.endswith("src/tools/update-copyright-year") or |
| path.endswith("src/tools/glnames.py")): |
| # The comment contains string of Copyright. Use only immediate Copyright |
| # string followed by "# ". |
| return line.startswith("# Copyright ") |
| |
| if path.endswith("src/gzip/inftrees.c"): |
| # The unused string constant contains word of Copyright. Use only immediate |
| # Copyright string followed by " * ". |
| return line.startswith(" * Copyright ") |
| |
| if path.endswith("src/base/ftver.rc"): |
| # Copyright string matches with LegalCopyright key in the RC file. |
| return not "LegalCopyright" in line |
| |
| return True |
| |
| |
| # Extract the copyright notice and put it into copyrights arg. |
| def do_file(path: str, copyrights: set, no_copyright_files: set): |
| raw = Path(path).read_bytes() |
| try: |
| content = raw.decode("utf-8") |
| except UnicodeDecodeError: |
| content = raw.decode("iso-8859-1") |
| |
| lines = content.splitlines() |
| |
| if not "Copyright" in content: |
| if path in no_copyright_files: |
| no_copyright_files.remove(path) |
| else: |
| fatal("%s does not contain Copyright line" % path) |
| return |
| |
| i = 0 |
| license_found = False |
| while i < len(lines): |
| if is_copyright_line(lines[i], path): |
| (notice, nexti) = extract_copyright_at(lines, i, path) |
| if notice: |
| if not notice in copyrights: |
| copyrights[notice] = [] |
| copyrights[notice].append(path) |
| license_found = True |
| |
| i = nexti |
| else: |
| i += 1 |
| |
| if not license_found: |
| fatal("License header could not found: %s" % path) |
| |
| def do_check(path, format): |
| if not path.endswith('/'): # make sure the path ends with slash |
| path = path + '/' |
| |
| file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME]) |
| no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES]) |
| copyrights = {} |
| |
| for directory, sub_directories, filenames in os.walk(path): |
| # skip .git directory |
| if ".git" in sub_directories: |
| sub_directories.remove(".git") |
| |
| for fname in filenames: |
| fpath = os.path.join(directory, fname) |
| if fpath in file_to_ignore: |
| file_to_ignore.remove(fpath) |
| continue |
| do_file(fpath, copyrights, no_copyright_files) |
| |
| if len(file_to_ignore) != 0: |
| fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n" |
| + "\n".join(file_to_ignore)) |
| |
| if len(no_copyright_files) != 0: |
| fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n" |
| + "\n".join(no_copyright_files)) |
| |
| if format == Format.notice: |
| print_notice(copyrights, False) |
| elif format == Format.notice_with_filename: |
| print_notice(copyrights, True) |
| elif format == Format.html: |
| print_html(copyrights) |
| |
| def print_html(copyrights): |
| print('<html>') |
| print(""" |
| <head> |
| <style> |
| table { |
| font-family: monospace |
| } |
| |
| table tr td { |
| padding: 10px 10px 10px 10px |
| } |
| </style> |
| </head> |
| """) |
| print('<body>') |
| print('<table border="1" style="border-collapse:collapse">') |
| for notice in sorted(copyrights.keys()): |
| files = sorted(copyrights[notice]) |
| |
| print('<tr>') |
| print('<td>') |
| print('<ul>') |
| for file in files: |
| print('<li>%s</li>' % file) |
| print('</ul>') |
| print('</td>') |
| print('<td>') |
| print('<p>%s</p>' % notice.replace('\n', '<br>')) |
| print('</td>') |
| |
| print('</tr>') |
| |
| |
| print('</table>') |
| print('</body></html>') |
| |
| def print_notice(copyrights, print_file): |
| # print the copyright in sorted order for stable output. |
| for notice in sorted(copyrights.keys()): |
| if print_file: |
| files = sorted(copyrights[notice]) |
| print("\n".join(files)) |
| print() |
| print(notice) |
| print() |
| print("-" * 67) |
| print() |
| |
| class Format(Enum): |
| notice = 'notice' |
| notice_with_filename = 'notice_with_filename' |
| html = 'html' |
| |
| def __str__(self): |
| return self.value |
| |
| def main(): |
| parser = argparse.ArgumentParser(description="Collect notice headers.") |
| parser.add_argument("--format", dest="format", type=Format, choices=list(Format), |
| default=Format.notice, help="print filename before the license notice") |
| parser.add_argument("--target", dest="target", action='store', |
| required=True, help="target directory to collect notice headers") |
| res = parser.parse_args() |
| do_check(res.target, res.format) |
| |
| if __name__ == "__main__": |
| main() |
| |