blob: 26d3939e4ba163335ffcb07c834bc09290fd15d6 [file] [log] [blame]
#!/usr/bin/env python3
from enum import Enum
from pathlib import Path
from typing import Sequence
from typing import Tuple
import argparse
import os
import re
import sys
# list of specific files to be ignored.
IGNORE_FILE_NAME = [
# Exclude myself
"generate_notice.py",
# License files
"LICENSE",
"LICENSE.TXT",
"LICENSE_APACHE2.TXT",
"LICENSE_BSD_3_CLAUSE.TXT",
"LICENSE_FSFAP.TXT",
"LICENSE_MIT.TXT",
"LICENSE_MIT_MODERN_VARIANT.TXT",
"MODULE_LICENSE_BSD_LIKE",
"NOTICE",
"builds/unix/LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT",
"builds/unix/LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT",
"docs/FTL.TXT",
"docs/GPLv2.TXT",
"src/gzip/LICENSE_ZLIB.TXT",
# The patch file contains copyright line as a diff. Use it if Copyright is not
# in a unified diff line.
"src/gzip/patches/freetype-zlib.diff",
]
NO_COPYRIGHT_FILES = [
".clang-format",
".gitignore",
".gitlab-ci.yml",
".mailmap",
"Android.bp",
"METADATA",
"OWNERS",
"README.android",
"TEST_MAPPING",
"builds/atari/ATARI.H",
"builds/atari/FNames.SIC",
"builds/atari/FREETYPE.PRJ",
"builds/atari/README.TXT",
"builds/atari/deflinejoiner.awk",
"builds/atari/gen-purec-patch.sh",
"builds/mac/FreeType.m68k_cfm.make.txt",
"builds/mac/FreeType.m68k_far.make.txt",
"builds/mac/FreeType.ppc_carbon.make.txt",
"builds/mac/FreeType.ppc_classic.make.txt",
"builds/mac/README",
"builds/mac/ascii2mpw.py",
"builds/mac/freetype-Info.plist",
"builds/mac/ftlib.prj.xml",
"builds/unix/.gitignore",
"builds/unix/freetype2.in",
"builds/vms/LIBS.OPT_IA64",
"builds/vms/_LINK.OPT_IA64",
"builds/vms/vmslib.dat",
"builds/wince/vc2005-ce/freetype.sln",
"builds/wince/vc2005-ce/freetype.vcproj",
"builds/wince/vc2005-ce/index.html",
"builds/wince/vc2008-ce/freetype.sln",
"builds/wince/vc2008-ce/freetype.vcproj",
"builds/wince/vc2008-ce/index.html",
"builds/windows/.gitignore",
"builds/windows/vc2010/freetype.sln",
"builds/windows/vc2010/freetype.user.props",
"builds/windows/vc2010/freetype.vcxproj",
"builds/windows/vc2010/freetype.vcxproj.filters",
"builds/windows/vc2010/index.html",
"builds/windows/visualc/freetype.dsp",
"builds/windows/visualc/freetype.dsw",
"builds/windows/visualc/freetype.sln",
"builds/windows/visualc/freetype.vcproj",
"builds/windows/visualc/index.html",
"builds/windows/visualce/freetype.dsp",
"builds/windows/visualce/freetype.dsw",
"builds/windows/visualce/freetype.vcproj",
"builds/windows/visualce/index.html",
"devel-teeui/OWNERS",
"devel-teeui/README.md",
"devel-teeui/ftmodule.h",
"devel-teeui/rules.json",
"devel-teeui/rules.mk",
"docs/.gitignore",
"docs/CMAKE",
"docs/INSTALL.MAC",
"docs/MAKEPP",
"docs/PROBLEMS",
"docs/README",
"docs/freetype-config.1",
"docs/markdown/images/favico.ico",
"docs/markdown/javascripts/extra.js",
"docs/markdown/stylesheets/extra.css",
"include/freetype/config/ftmodule.h",
"include/freetype/ftchapters.h",
"libft2.map.txt",
"objs/.gitignore",
"objs/README",
"src/gzip/README.freetype",
"src/gzip/crc32.h",
"src/gzip/inffixed.h",
"src/tools/apinames.c",
"src/tools/chktrcmp.py",
"src/tools/cordic.py",
"src/tools/ftrandom/Makefile",
"src/tools/ftrandom/README",
"src/tools/make_distribution_archives.py",
"src/tools/no-copyright",
"src/tools/test_afm.c",
"src/tools/test_bbox.c",
"src/tools/test_trig.c",
"src/tools/update-copyright",
"subprojects/harfbuzz.wrap",
"subprojects/libpng.wrap",
"subprojects/zlib.wrap",
"tests/README.md",
"tests/issue-1063/main.c",
"tests/meson.build",
"tests/scripts/download-test-fonts.py",
]
class CommentType(Enum):
C_STYLE_BLOCK = 1 # /* ... */
C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments.
C_STYLE_LINE = 3 # // ...
SCRIPT_STYLE_HASH = 4 # # ...
SCRIPT_STYLE_DOLLER = 5 # $! ...
DOC_STYLE = 6 # no comment escape
UNKNOWN = 10000
# Helper function of showing error message and immediate exit.
def fatal(msg: str):
sys.stderr.write(msg)
sys.stderr.write("\n")
sys.exit(1)
def warn(msg: str):
sys.stderr.write(msg)
sys.stderr.write("\n")
def cleanup_and_join(out_lines: Sequence[str]):
while not out_lines[-1].strip():
out_lines.pop(-1)
# If all lines starts from empty space, strip it out.
while all([len(x) == 0 or x[0] == ' ' for x in out_lines]):
out_lines = [x[1:] for x in out_lines]
if not out_lines:
fatal("Failed to get copyright info")
return "\n".join(out_lines)
def get_comment_type(copyright_line: str, path: str) -> CommentType:
# vms_make.com contains multiple copyright header as a string constants.
if path.endswith("/vms_make.com"):
return CommentType.SCRIPT_STYLE_DOLLER
if "docs/" in path or "README" in path:
return CommentType.DOC_STYLE
if copyright_line.startswith("#"):
return CommentType.SCRIPT_STYLE_HASH
if copyright_line.startswith("//"):
return CommentType.C_STYLE_LINE
if copyright_line.startswith("$!"):
return CommentType.SCRIPT_STYLE_DOLLER
if "/*" in copyright_line and "*/" in copyright_line:
# ftrandom.c uses single line block comment for the first Copyright line,
# and following license notice is wrapped with single block comment.
# This file can be handled by C_STYLE_BLOCK parser.
if path.endswith("src/tools/ftrandom/ftrandom.c"):
return CommentType.C_STYLE_BLOCK
else:
return CommentType.C_STYLE_BLOCK_AS_LINE
else:
return CommentType.C_STYLE_BLOCK
# Extract copyright notice and returns next index.
def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
commentType = get_comment_type(lines[i], path)
if commentType == CommentType.C_STYLE_BLOCK:
return extract_from_c_style_block_at(lines, i, path)
if commentType == CommentType.C_STYLE_BLOCK_AS_LINE:
return extract_from_c_style_block_as_line_at(lines, i, path)
elif commentType == CommentType.C_STYLE_LINE:
return extract_from_c_style_lines_at(lines, i, path)
elif commentType == CommentType.SCRIPT_STYLE_HASH:
return extract_from_script_hash_at(lines, i, path)
elif commentType == CommentType.SCRIPT_STYLE_DOLLER:
return extract_from_script_doller_at(lines, i, path)
elif commentType == CommentType.DOC_STYLE:
return extract_from_doc_style_at(lines, i, path)
else:
fatal("Uknown comment style: %s" % lines[i])
def extract_from_doc_style_at(
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
if not lines[i].startswith("Copyright"):
return (None, i + 1)
def is_copyright_end(lines: str, start: int, i: int) -> bool:
# treat double spacing as end of license header
if i - start > 4 and lines[i] == "" and lines[i + 1] == "":
return True
return False
start = i
while i < len(lines):
if is_copyright_end(lines, start, i):
break
i += 1
end = i
if start == end:
fatal("Failed to get copyright info")
out_lines = lines[start:end]
return (cleanup_and_join(out_lines), i + 1)
def extract_from_c_style_lines_at(
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
def is_copyright_end(line):
if line.startswith("//"):
return False
else:
return True
start = i
while i < len(lines):
if is_copyright_end(lines[i]):
break
i += 1
end = i
if start == end:
fatal("Failed to get copyright info")
out_lines = []
for line in lines[start:end]:
if line.startswith("// "):
out_lines.append(line[3:])
elif line == "//":
out_lines.append(line[2:])
else:
out_lines.append(line)
return (cleanup_and_join(out_lines), i + 1)
def extract_from_script_hash_at(
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
if lines[i].strip()[0] != "#":
return (None, i + 1)
def is_copyright_end(lines: str, i: int) -> bool:
if "#" not in lines[i]:
return True
# treat double spacing as end of license header
if lines[i] == "#" and lines[i+1] == "#":
return True
return False
start = i
while i < len(lines):
if is_copyright_end(lines, i):
break
i += 1
end = i
if start == end:
fatal("Failed to get copyright info")
out_lines = []
for line in lines[start:end]:
if line.startswith("# "):
out_lines.append(line[2:])
elif line == "#":
out_lines.append(line[1:])
else:
out_lines.append(line)
return (cleanup_and_join(out_lines), i + 1)
def extract_from_script_doller_at(
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
if not lines[i].strip().startswith("$!"):
return (None, i + 1)
def is_copyright_end(lines: str, i: int) -> bool:
if "$!" not in lines[i]:
return True
# treat double spacing as end of license header
if lines[i] == "$!" and lines[i+1] == "$!":
return True
return False
start = i
while i < len(lines):
if is_copyright_end(lines, i):
break
i += 1
end = i + 1
if start == end:
fatal("Failed to get copyright info")
out_lines = []
for line in lines[start:end]:
if line.startswith("$! "):
out_lines.append(line[3:])
elif line == "$!":
out_lines.append(line[2:])
else:
out_lines.append(line)
return (cleanup_and_join(out_lines), i + 1)
def extract_from_c_style_block_at(
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
def is_copyright_end(lines: str, i: int) -> bool:
if "*/" in lines[i]:
return True
if "understand and accept it fully." in lines[i]:
return True
if "see copyright notice in zlib.h" in lines[i]:
return True
if lines[i] == " *" and lines[i + 1] == " *":
return True
if lines[i] == "" and lines[i + 1] == "":
return True
return False
start = i
i += 1 # include at least one line
while i < len(lines):
if is_copyright_end(lines, i):
break
i += 1
end = i + 1
out_lines = []
for line in lines[start:end]:
clean_line = line
# Strip begining "/*" chars
if clean_line.startswith("/* "):
clean_line = clean_line[3:]
if clean_line == "/*":
clean_line = clean_line[2:]
# Strip ending "*/" chars
if clean_line.endswith(" */"):
clean_line = clean_line[:-3]
if clean_line.endswith("*/"):
clean_line = clean_line[:-2]
# Strip starting " *" chars
if clean_line.startswith(" * "):
clean_line = clean_line[3:]
if clean_line == " *":
clean_line = line[2:]
# Strip trailing spaces
clean_line = clean_line.rstrip()
out_lines.append(clean_line)
return (cleanup_and_join(out_lines), i + 1)
def extract_from_c_style_block_as_line_at(
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
def is_copyright_end(line: str) -> bool:
if "*/" in line:
return False
if re.match(r'/\*+/', line.strip()):
return False
return True
start = i
i += 1 # include at least one line
while i < len(lines):
if is_copyright_end(lines[i]):
break
i += 1
end = i + 1
out_lines = []
for line in lines[start:end]:
clean_line = line
if re.match(r'/\*+/', line.strip()):
continue
# Strip begining "/*" chars
if clean_line.startswith("/* "):
clean_line = clean_line[3:]
if clean_line == "/*":
clean_line = clean_line[2:]
# Strip ending "*/" chars
if clean_line.endswith(" */"):
clean_line = clean_line[:-3]
if clean_line.endswith("*/"):
clean_line = clean_line[:-2]
# Strip starting " *" chars
if clean_line.startswith(" * "):
clean_line = clean_line[3:]
if clean_line == " *":
clean_line = line[2:]
# Strip trailing spaces
clean_line = clean_line.rstrip()
out_lines.append(clean_line)
return (cleanup_and_join(out_lines), i + 1)
# Returns true if the line shows the start of copyright notice.
def is_copyright_line(line: str, path: str) -> bool:
if "Copyright" not in line:
return False
# For avoiding unexpected mismatches, exclude quoted Copyright string.
if "`Copyright'" in line: # For src/psaux/psobjs.c
return False
if "\"Copyright\"" in line: # For src/cff/cfftoken.h
return False
if (path.endswith("src/tools/update-copyright-year") or
path.endswith("src/tools/glnames.py")):
# The comment contains string of Copyright. Use only immediate Copyright
# string followed by "# ".
return line.startswith("# Copyright ")
if path.endswith("src/gzip/inftrees.c"):
# The unused string constant contains word of Copyright. Use only immediate
# Copyright string followed by " * ".
return line.startswith(" * Copyright ")
if path.endswith("src/base/ftver.rc"):
# Copyright string matches with LegalCopyright key in the RC file.
return not "LegalCopyright" in line
return True
# Extract the copyright notice and put it into copyrights arg.
def do_file(path: str, copyrights: set, no_copyright_files: set):
raw = Path(path).read_bytes()
try:
content = raw.decode("utf-8")
except UnicodeDecodeError:
content = raw.decode("iso-8859-1")
lines = content.splitlines()
if not "Copyright" in content:
if path in no_copyright_files:
no_copyright_files.remove(path)
else:
fatal("%s does not contain Copyright line" % path)
return
i = 0
license_found = False
while i < len(lines):
if is_copyright_line(lines[i], path):
(notice, nexti) = extract_copyright_at(lines, i, path)
if notice:
if not notice in copyrights:
copyrights[notice] = []
copyrights[notice].append(path)
license_found = True
i = nexti
else:
i += 1
if not license_found:
fatal("License header could not found: %s" % path)
def do_check(path, format):
if not path.endswith('/'): # make sure the path ends with slash
path = path + '/'
file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME])
no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES])
copyrights = {}
for directory, sub_directories, filenames in os.walk(path):
# skip .git directory
if ".git" in sub_directories:
sub_directories.remove(".git")
for fname in filenames:
fpath = os.path.join(directory, fname)
if fpath in file_to_ignore:
file_to_ignore.remove(fpath)
continue
do_file(fpath, copyrights, no_copyright_files)
if len(file_to_ignore) != 0:
fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n"
+ "\n".join(file_to_ignore))
if len(no_copyright_files) != 0:
fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n"
+ "\n".join(no_copyright_files))
if format == Format.notice:
print_notice(copyrights, False)
elif format == Format.notice_with_filename:
print_notice(copyrights, True)
elif format == Format.html:
print_html(copyrights)
def print_html(copyrights):
print('<html>')
print("""
<head>
<style>
table {
font-family: monospace
}
table tr td {
padding: 10px 10px 10px 10px
}
</style>
</head>
""")
print('<body>')
print('<table border="1" style="border-collapse:collapse">')
for notice in sorted(copyrights.keys()):
files = sorted(copyrights[notice])
print('<tr>')
print('<td>')
print('<ul>')
for file in files:
print('<li>%s</li>' % file)
print('</ul>')
print('</td>')
print('<td>')
print('<p>%s</p>' % notice.replace('\n', '<br>'))
print('</td>')
print('</tr>')
print('</table>')
print('</body></html>')
def print_notice(copyrights, print_file):
# print the copyright in sorted order for stable output.
for notice in sorted(copyrights.keys()):
if print_file:
files = sorted(copyrights[notice])
print("\n".join(files))
print()
print(notice)
print()
print("-" * 67)
print()
class Format(Enum):
notice = 'notice'
notice_with_filename = 'notice_with_filename'
html = 'html'
def __str__(self):
return self.value
def main():
parser = argparse.ArgumentParser(description="Collect notice headers.")
parser.add_argument("--format", dest="format", type=Format, choices=list(Format),
default=Format.notice, help="print filename before the license notice")
parser.add_argument("--target", dest="target", action='store',
required=True, help="target directory to collect notice headers")
res = parser.parse_args()
do_check(res.target, res.format)
if __name__ == "__main__":
main()