| #!/usr/bin/python |
| |
| import os |
| import re |
| import sys |
| |
| link_re = re.compile('\[' + '[^\[\]]+' + '\]' + '\(' + '([^\(\)]+)' + '\)') |
| |
| if len(sys.argv) < 3: |
| print 'Usage: <root_dir> <doc_files>...' |
| sys.exit(1) |
| |
| root = sys.argv[1] |
| docs = sys.argv[2:] |
| |
| links = [] |
| |
| for doc in docs: |
| with open(doc) as f: |
| data = f.read() |
| r = link_re.findall(data) |
| for link in r: |
| links += [(doc, link)] |
| |
| def filter_link((doc, link)): |
| if link.startswith('http'): |
| return False |
| if link.startswith('#'): |
| return False |
| return True |
| |
| links = filter(filter_link, links) |
| |
| def fix_link((doc, link)): |
| link = link.split('#')[0] |
| link = link.split('?')[0] |
| return (doc, link) |
| |
| links = map(fix_link, links) |
| |
| errors = [] |
| |
| def check_link((doc, link)): |
| path = os.path.dirname(doc) |
| full_link = None |
| if link[0] == '/': |
| link = link[1:] |
| full_link = os.path.join(root, link) |
| else: |
| full_link = os.path.join(root, path, link) |
| if not os.path.exists(full_link): |
| return False |
| return True |
| |
| for link in links: |
| if not check_link(link): |
| errors += [link] |
| |
| if len(errors) == 0: |
| print '%d links checked: OK' % (len(links),) |
| sys.exit(0) |
| |
| for (doc, link) in errors: |
| print 'File %s linked from %s not found' % (link, doc) |
| |
| sys.exit(2) |