make_cdd.py - platform/compatibility/cdd - Git at Google

 #!/usr/bin/python
 """
 Utility for building the CDD from component markdown files.

 From the compatibility/cdd directory, run:
 python make-cdd.py --version <version number> --branch <AOSP branch>
     --output <output file name>

 Each generated CDD file is marked with a hash based on the content of the input files.

 TODO(gdimino): Clean up and comment this code.
 """

 from bs4 import BeautifulSoup
 import argparse
 import codecs
 import jinja2
 import markdown
 import os
 import re
 import subprocess
 import tidylib


 HEADERS_FOR_TOC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
 TOC_PER_COL = 34

 def get_section_info(my_path):
   # (_, _, filenames) = os.walk(my_path).next()
   section_info = [];
   # Get section info from every file whose name contains a number. TODO: fix
   # this ugly hack.
   # for rootdir, subdirs, files in os.walk(my_path):
   for dir in get_immediate_subdirs(my_path):
     # for dir  in subdirs:
     if (not dir.isalpha() and dir != 'older-versions' and dir != '.git'):
       child_data = []
       print 'dir = ' + dir
       for file in os.listdir(dir):
         if '.md' in file:
           if file == 'index.md':
             number =  0
           else:
             number = int((file.split('_')[1]))
           print 'file = ' + file + ', dir = ' + dir
           html_string = markdown.markdown(codecs.open(my_path + '/' + dir + '/' + file, 'r', encoding='utf-8').read())
           child_data.append({'file': file,
                              'number': number,
                              'title': dir.split('_')[-1],
                              'html': html_string,
                              'children':[]})
       child_data.sort(key=lambda child: child['number'])
       section_info.append({'id': dir,
                            'number': int(''.join((dir.split('_')[:-1])).replace("_", ".")),
                            'title': dir.split('_')[-1],
                            'html': '',
                            'children':child_data})
   section_info.sort(key=lambda section: section['number'])
   return section_info


 def get_soup(section_info, version):
   html_body_text = u'''<!DOCTYPE html>
 <head>
 <title>Android ''' + version + ''' Compatibility Definition</title>
 <link rel="stylesheet" type="text/css" href="source/android-cdd.css"/>
 <meta charset="utf-8" />
 </head>
 <body>
 <div id="main">'''

   for section in section_info:
      for child in section['children']:
        html_body_text += child['html']
   html_body_text +=  '</div></body><html>'
   return BeautifulSoup(html_body_text)

 def get_soup_devsite(section_info):
   html_body_text = ''
   for section in section_info:
      for child in section['children']:
        html_body_text += child['html']
   return BeautifulSoup(html_body_text)


 def add_id_to_section_headers(soup):
   header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
   for tag in soup.find_all(header_tags):
     tag['id'] = create_id(tag)

 def generate_toc(soup):
   toc_html = '<div id="toc">'
   header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
   toc_entries =  soup.find_all(header_tags)
   toc_chunks = [toc_entries[i:i + TOC_PER_COL] for i in xrange(0, len(toc_entries), TOC_PER_COL)]
   print 'Number of chunks =  %d' % len(toc_chunks)
   for chunk in toc_chunks:
     if not toc_chunks.index(chunk) %2:
       toc_html = toc_html + ('<div id="toc_left">')
       for tag in chunk:
         toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>'
       toc_html = toc_html + ('</div>')
     else:
       toc_html = toc_html + ('<div id="toc_right">')
       for tag in chunk:
         toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>'
       toc_html = toc_html + ('</div>')
       toc_html = toc_html + '<div style="clear: both; page-break-after:always; height:1px"></div>'
   toc_html = toc_html + '<div style="clear: both"></div>'
   return (BeautifulSoup(toc_html).body.contents)

 def add_toc(soup):
   toc_contents = generate_toc(soup)[0]
   toc_title =  BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0]
   soup.body.insert(0, toc_contents)
   soup.body.insert(0, toc_title)
   return soup

 def create_id(header_tag):
   return header_tag.contents[0].lower().replace('. ', '_').replace(' ', '_').replace('.', '_')

 def decrease_headings(soup):
   heading_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8']
   headings =  soup.find_all(heading_tags)
   for heading in headings:
     level = int(re.search(r'(\d)', heading.name).groups()[0])
     heading.name = 'h%d' % (level + 1)
   return soup

 def get_version_branch_and_output():
   # Get command-line args.  If there aren't any, then prompt for user input.
   parser = argparse.ArgumentParser()
   parser.add_argument('--version', help='Android version')
   parser.add_argument('--branch', help='AOSP branch')
   parser.add_argument('--output', help='Base name of output file')
   args = parser.parse_args()

   if not args.version:
     args.version = raw_input('Android version for CDD: ')
   if not args.branch:
     args.branch = raw_input('Current AOSP branch for changelog: ')
   if not args.output:
     args.output = raw_input('Base name of desired output file: ')
   return (args.version, args.branch, args.output)

 # Utilities
 def get_immediate_subdirs(dir):
     return [name for name in os.listdir(dir)
             if os.path.isdir(os.path.join(dir, name))]

 def render_content(page_info, template_filename):
   fp = open(template_filename)
   temp_file = fp.read().encode('utf8')
   fp.close()
   return jinja2.Template(temp_file).render(page_info)

 # Odds and ends

 def check_section_numbering(soup):
   header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
   for tag in header_tags:
     headings = soup.find_all(tag)
     header_numbers = []
     for heading in headings:
       header_numbers.append(re.sub(r"([\d.]*).*", r"\1"), heading.contents)
   return true

 # Abandoned in favor of tidy.
 def elim_para_whitespace(html):
   new_html = re.sub(re.compile(r"(<p[^>]*>)\s*\n\s*(<a[^>]*>)\n([^<]*)\n\s*(</a>)\n\s*(</p>)", re.M),r"\1\2\3\4\5\n", html)
   return new_html


 def elim_space_before_punc(html):
   new_html = re.sub(re.compile(r"</a>\s+([.,;:])", re.M),r"</a>\1", html)
   return new_html


 def main():
   # Read version and branch info and output file name.
   global ANDROID_VERSION
   (ANDROID_VERSION, CURRENT_BRANCH, output_filename) = get_version_branch_and_output()

   # Scan current directory for source files and compile info for the toc..
   my_path = os.getcwd()
   section_info = get_section_info(my_path)

   # Get page info
   page_info = { 'title': 'Android ANDROID_VERSION Compatibility Definition',
                 'book_path': '/_book.yaml',
                 'project_path': '/_project.yaml'
                }

   # Generate the HTML for PDF
   soup = get_soup(section_info, ANDROID_VERSION)
   add_id_to_section_headers(soup)
   add_toc(soup)
   html = soup.prettify(formatter='html')

   # Generate the HTML for devsite
   devsite_soup = get_soup_devsite(section_info)
   add_id_to_section_headers(devsite_soup)
   add_id_to_section_headers(soup)
   page_info['body_html'] =  decrease_headings(devsite_soup)
   devsite_html = render_content(page_info, 'source/devsite_template.html')

   html = soup.prettify(formatter='html')

   # Add version and branch info
   html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, html)
   html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, html)

   devsite_html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, devsite_html)
   devsite_html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, devsite_html)

   # Apply HTML Tidy to output
   (document, errors) = tidylib.tidy_document(html, options={'doctype': 'omit'})
   (devsite_document, errors) = tidylib.tidy_document(devsite_html, options={'doctype': 'omit'})

   # Eliminate space before punctuation
   html = elim_space_before_punc(html)
   devsite_html = elim_space_before_punc(devsite_html)

   # Write output files
   output = codecs.open('%s.html' % output_filename, 'w', encoding='utf-8')
   output.write(document)
   output.close()

   devsite_output = codecs.open('%s-devsite.html' % output_filename, 'w', encoding='utf-8')
   devsite_output.write(devsite_document)
   output.close()

   # Code to generate PDF
   # TODO(gdimino)

   # subprocess.call('wkhtmltopdf -B 1in -T 1in -L .75in -R .75in page ' +
   #                output_filename +
   #                ' --footer-html source/android-cdd-footer.html  /tmp/android-cdd-body.pdf', shell=True)

 if __name__ == '__main__':
   main()
	#!/usr/bin/python
	"""
	Utility for building the CDD from component markdown files.

	From the compatibility/cdd directory, run:
	python make-cdd.py --version <version number> --branch <AOSP branch>
	--output <output file name>

	Each generated CDD file is marked with a hash based on the content of the input files.

	TODO(gdimino): Clean up and comment this code.
	"""

	from bs4 import BeautifulSoup
	import argparse
	import codecs
	import jinja2
	import markdown
	import os
	import re
	import subprocess
	import tidylib


	HEADERS_FOR_TOC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
	TOC_PER_COL = 34

	def get_section_info(my_path):
	# (_, _, filenames) = os.walk(my_path).next()
	section_info = [];
	# Get section info from every file whose name contains a number. TODO: fix
	# this ugly hack.
	# for rootdir, subdirs, files in os.walk(my_path):
	for dir in get_immediate_subdirs(my_path):
	# for dir in subdirs:
	if (not dir.isalpha() and dir != 'older-versions' and dir != '.git'):
	child_data = []
	print 'dir = ' + dir
	for file in os.listdir(dir):
	if '.md' in file:
	if file == 'index.md':
	number = 0
	else:
	number = int((file.split('_')[1]))
	print 'file = ' + file + ', dir = ' + dir
	html_string = markdown.markdown(codecs.open(my_path + '/' + dir + '/' + file, 'r', encoding='utf-8').read())
	child_data.append({'file': file,
	'number': number,
	'title': dir.split('_')[-1],
	'html': html_string,
	'children':[]})
	child_data.sort(key=lambda child: child['number'])
	section_info.append({'id': dir,
	'number': int(''.join((dir.split('_')[:-1])).replace("_", ".")),
	'title': dir.split('_')[-1],
	'html': '',
	'children':child_data})
	section_info.sort(key=lambda section: section['number'])
	return section_info


	def get_soup(section_info, version):
	html_body_text = u'''<!DOCTYPE html>
	<head>
	<title>Android ''' + version + ''' Compatibility Definition</title>
	<link rel="stylesheet" type="text/css" href="source/android-cdd.css"/>
	<meta charset="utf-8" />
	</head>
	<body>
	<div id="main">'''

	for section in section_info:
	for child in section['children']:
	html_body_text += child['html']
	html_body_text += '</div></body><html>'
	return BeautifulSoup(html_body_text)

	def get_soup_devsite(section_info):
	html_body_text = ''
	for section in section_info:
	for child in section['children']:
	html_body_text += child['html']
	return BeautifulSoup(html_body_text)


	def add_id_to_section_headers(soup):
	header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
	for tag in soup.find_all(header_tags):
	tag['id'] = create_id(tag)

	def generate_toc(soup):
	toc_html = '<div id="toc">'
	header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
	toc_entries = soup.find_all(header_tags)
	toc_chunks = [toc_entries[i:i + TOC_PER_COL] for i in xrange(0, len(toc_entries), TOC_PER_COL)]
	print 'Number of chunks = %d' % len(toc_chunks)
	for chunk in toc_chunks:
	if not toc_chunks.index(chunk) %2:
	toc_html = toc_html + ('<div id="toc_left">')
	for tag in chunk:
	toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>'
	toc_html = toc_html + ('</div>')
	else:
	toc_html = toc_html + ('<div id="toc_right">')
	for tag in chunk:
	toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>'
	toc_html = toc_html + ('</div>')
	toc_html = toc_html + '<div style="clear: both; page-break-after:always; height:1px"></div>'
	toc_html = toc_html + '<div style="clear: both"></div>'
	return (BeautifulSoup(toc_html).body.contents)

	def add_toc(soup):
	toc_contents = generate_toc(soup)[0]
	toc_title = BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0]
	soup.body.insert(0, toc_contents)
	soup.body.insert(0, toc_title)
	return soup

	def create_id(header_tag):
	return header_tag.contents[0].lower().replace('. ', '_').replace(' ', '_').replace('.', '_')

	def decrease_headings(soup):
	heading_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8']
	headings = soup.find_all(heading_tags)
	for heading in headings:
	level = int(re.search(r'(\d)', heading.name).groups()[0])
	heading.name = 'h%d' % (level + 1)
	return soup

	def get_version_branch_and_output():
	# Get command-line args. If there aren't any, then prompt for user input.
	parser = argparse.ArgumentParser()
	parser.add_argument('--version', help='Android version')
	parser.add_argument('--branch', help='AOSP branch')
	parser.add_argument('--output', help='Base name of output file')
	args = parser.parse_args()

	if not args.version:
	args.version = raw_input('Android version for CDD: ')
	if not args.branch:
	args.branch = raw_input('Current AOSP branch for changelog: ')
	if not args.output:
	args.output = raw_input('Base name of desired output file: ')
	return (args.version, args.branch, args.output)

	# Utilities
	def get_immediate_subdirs(dir):
	return [name for name in os.listdir(dir)
	if os.path.isdir(os.path.join(dir, name))]

	def render_content(page_info, template_filename):
	fp = open(template_filename)
	temp_file = fp.read().encode('utf8')
	fp.close()
	return jinja2.Template(temp_file).render(page_info)

	# Odds and ends

	def check_section_numbering(soup):
	header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7']
	for tag in header_tags:
	headings = soup.find_all(tag)
	header_numbers = []
	for heading in headings:
	header_numbers.append(re.sub(r"([\d.]).", r"\1"), heading.contents)
	return true

	# Abandoned in favor of tidy.
	def elim_para_whitespace(html):
	new_html = re.sub(re.compile(r"(<p[^>]>)\s\n\s(<a[^>]>)\n([^<])\n\s(</a>)\n\s*(</p>)", re.M),r"\1\2\3\4\5\n", html)
	return new_html


	def elim_space_before_punc(html):
	new_html = re.sub(re.compile(r"</a>\s+([.,;:])", re.M),r"</a>\1", html)
	return new_html


	def main():
	# Read version and branch info and output file name.
	global ANDROID_VERSION
	(ANDROID_VERSION, CURRENT_BRANCH, output_filename) = get_version_branch_and_output()

	# Scan current directory for source files and compile info for the toc..
	my_path = os.getcwd()
	section_info = get_section_info(my_path)

	# Get page info
	page_info = { 'title': 'Android ANDROID_VERSION Compatibility Definition',
	'book_path': '/_book.yaml',
	'project_path': '/_project.yaml'
	}

	# Generate the HTML for PDF
	soup = get_soup(section_info, ANDROID_VERSION)
	add_id_to_section_headers(soup)
	add_toc(soup)
	html = soup.prettify(formatter='html')

	# Generate the HTML for devsite
	devsite_soup = get_soup_devsite(section_info)
	add_id_to_section_headers(devsite_soup)
	add_id_to_section_headers(soup)
	page_info['body_html'] = decrease_headings(devsite_soup)
	devsite_html = render_content(page_info, 'source/devsite_template.html')

	html = soup.prettify(formatter='html')

	# Add version and branch info
	html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, html)
	html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, html)

	devsite_html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, devsite_html)
	devsite_html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, devsite_html)

	# Apply HTML Tidy to output
	(document, errors) = tidylib.tidy_document(html, options={'doctype': 'omit'})
	(devsite_document, errors) = tidylib.tidy_document(devsite_html, options={'doctype': 'omit'})

	# Eliminate space before punctuation
	html = elim_space_before_punc(html)
	devsite_html = elim_space_before_punc(devsite_html)

	# Write output files
	output = codecs.open('%s.html' % output_filename, 'w', encoding='utf-8')
	output.write(document)
	output.close()

	devsite_output = codecs.open('%s-devsite.html' % output_filename, 'w', encoding='utf-8')
	devsite_output.write(devsite_document)
	output.close()

	# Code to generate PDF
	# TODO(gdimino)

	# subprocess.call('wkhtmltopdf -B 1in -T 1in -L .75in -R .75in page ' +
	# output_filename +
	# ' --footer-html source/android-cdd-footer.html /tmp/android-cdd-body.pdf', shell=True)

	if __name__ == '__main__':
	main()