chrome/common/extensions/docs/server2/intro_data_source.py - platform/external/chromium_org - Git at Google

 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 from HTMLParser import HTMLParser
 import logging
 import os
 import re

 from docs_server_utils import FormatKey
 from file_system import FileNotFoundError
 from third_party.handlebar import Handlebar

 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro
 # article data sources created as instances of it.

 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL)

 class _IntroParser(HTMLParser):
   ''' An HTML parser which will parse table of contents and page title info out
   of an intro.
   '''
   def __init__(self):
     HTMLParser.__init__(self)
     self.toc = []
     self.page_title = None
     self._recent_tag = None
     self._current_heading = {}

   def handle_starttag(self, tag, attrs):
     id_ = ''
     if tag not in ['h1', 'h2', 'h3']:
       return
     if tag != 'h1' or self.page_title is None:
       self._recent_tag = tag
     for attr in attrs:
       if attr[0] == 'id':
         id_ = attr[1]
     if tag == 'h2':
       self._current_heading = { 'link': id_, 'subheadings': [], 'title': '' }
       self.toc.append(self._current_heading)
     elif tag == 'h3':
       self._current_heading = { 'link': id_, 'title': '' }
       self.toc[-1]['subheadings'].append(self._current_heading)

   def handle_endtag(self, tag):
     if tag in ['h1', 'h2', 'h3']:
       self._recent_tag = None

   def handle_data(self, data):
     if self._recent_tag is None:
       return
     if self._recent_tag == 'h1':
       if self.page_title is None:
         self.page_title = data
       else:
         self.page_title += data
     elif self._recent_tag in ['h2', 'h3']:
       self._current_heading['title'] += data

 class IntroDataSource(object):
   '''This class fetches the intros for a given API. From this intro, a table
   of contents dictionary is created, which contains the headings in the intro.
   '''
   class Factory(object):
     def __init__(self,
                  compiled_fs_factory,
                  file_system,
                  ref_resolver_factory,
                  base_paths):
       self._cache = compiled_fs_factory.Create(file_system,
                                                self._MakeIntroDict,
                                                IntroDataSource)
       self._ref_resolver = ref_resolver_factory.Create()
       self._base_paths = base_paths

     def _MakeIntroDict(self, intro_path, intro):
       # Guess the name of the API from the path to the intro.
       api_name = os.path.splitext(intro_path.split('/')[-1])[0]
       intro_with_links = self._ref_resolver.ResolveAllLinks(intro,
                                                             namespace=api_name)
       # TODO(kalman): Do $ref replacement after rendering the template, not
       # before, so that (a) $ref links can contain template annotations, and (b)
       # we can use CompiledFileSystem.ForTemplates to create the templates and
       # save ourselves some effort.
       apps_parser = _IntroParser()
       apps_parser.feed(Handlebar(intro_with_links).render(
           { 'is_apps': True }).text)
       extensions_parser = _IntroParser()
       extensions_parser.feed(Handlebar(intro_with_links).render(
           { 'is_apps': False }).text)
       # TODO(cduvall): Use the normal template rendering system, so we can check
       # errors.
       if extensions_parser.page_title != apps_parser.page_title:
         logging.error(
             'Title differs for apps and extensions: Apps: %s, Extensions: %s.' %
                 (extensions_parser.page_title, apps_parser.page_title))
       # The templates will render the heading themselves, so remove it from the
       # HTML content.
       intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1)
       return {
         'intro': Handlebar(intro_with_links),
         'title': apps_parser.page_title,
         'apps_toc': apps_parser.toc,
         'extensions_toc': extensions_parser.toc,
       }

     def Create(self):
       return IntroDataSource(self._cache, self._base_paths)

   def __init__(self, cache, base_paths):
     self._cache = cache
     self._base_paths = base_paths

   def get(self, key):
     path = FormatKey(key)
     def get_from_base_path(base_path):
       return self._cache.GetFromFile('%s/%s' % (base_path, path)).Get()
     for base_path in self._base_paths:
       try:
         return get_from_base_path(base_path)
       except FileNotFoundError:
         continue
     # Not found. Do the first operation again so that we get a stack trace - we
     # know that it'll fail.
     get_from_base_path(self._base_paths[0])
     raise AssertionError()
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	from HTMLParser import HTMLParser
	import logging
	import os
	import re

	from docs_server_utils import FormatKey
	from file_system import FileNotFoundError
	from third_party.handlebar import Handlebar

	# TODO(kalman): rename this HTMLDataSource or other, then have separate intro
	# article data sources created as instances of it.

	_H1_REGEX = re.compile('<h1[^>.]?>.?</h1>', flags=re.DOTALL)

	class _IntroParser(HTMLParser):
	''' An HTML parser which will parse table of contents and page title info out
	of an intro.
	'''
	def __init__(self):
	HTMLParser.__init__(self)
	self.toc = []
	self.page_title = None
	self._recent_tag = None
	self._current_heading = {}

	def handle_starttag(self, tag, attrs):
	id_ = ''
	if tag not in ['h1', 'h2', 'h3']:
	return
	if tag != 'h1' or self.page_title is None:
	self._recent_tag = tag
	for attr in attrs:
	if attr[0] == 'id':
	id_ = attr[1]
	if tag == 'h2':
	self._current_heading = { 'link': id_, 'subheadings': [], 'title': '' }
	self.toc.append(self._current_heading)
	elif tag == 'h3':
	self._current_heading = { 'link': id_, 'title': '' }
	self.toc[-1]['subheadings'].append(self._current_heading)

	def handle_endtag(self, tag):
	if tag in ['h1', 'h2', 'h3']:
	self._recent_tag = None

	def handle_data(self, data):
	if self._recent_tag is None:
	return
	if self._recent_tag == 'h1':
	if self.page_title is None:
	self.page_title = data
	else:
	self.page_title += data
	elif self._recent_tag in ['h2', 'h3']:
	self._current_heading['title'] += data

	class IntroDataSource(object):
	'''This class fetches the intros for a given API. From this intro, a table
	of contents dictionary is created, which contains the headings in the intro.
	'''
	class Factory(object):
	def __init__(self,
	compiled_fs_factory,
	file_system,
	ref_resolver_factory,
	base_paths):
	self._cache = compiled_fs_factory.Create(file_system,
	self._MakeIntroDict,
	IntroDataSource)
	self._ref_resolver = ref_resolver_factory.Create()
	self._base_paths = base_paths

	def _MakeIntroDict(self, intro_path, intro):
	# Guess the name of the API from the path to the intro.
	api_name = os.path.splitext(intro_path.split('/')[-1])[0]
	intro_with_links = self._ref_resolver.ResolveAllLinks(intro,
	namespace=api_name)
	# TODO(kalman): Do $ref replacement after rendering the template, not
	# before, so that (a) $ref links can contain template annotations, and (b)
	# we can use CompiledFileSystem.ForTemplates to create the templates and
	# save ourselves some effort.
	apps_parser = _IntroParser()
	apps_parser.feed(Handlebar(intro_with_links).render(
	{ 'is_apps': True }).text)
	extensions_parser = _IntroParser()
	extensions_parser.feed(Handlebar(intro_with_links).render(
	{ 'is_apps': False }).text)
	# TODO(cduvall): Use the normal template rendering system, so we can check
	# errors.
	if extensions_parser.page_title != apps_parser.page_title:
	logging.error(
	'Title differs for apps and extensions: Apps: %s, Extensions: %s.' %
	(extensions_parser.page_title, apps_parser.page_title))
	# The templates will render the heading themselves, so remove it from the
	# HTML content.
	intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1)
	return {
	'intro': Handlebar(intro_with_links),
	'title': apps_parser.page_title,
	'apps_toc': apps_parser.toc,
	'extensions_toc': extensions_parser.toc,
	}

	def Create(self):
	return IntroDataSource(self._cache, self._base_paths)

	def __init__(self, cache, base_paths):
	self._cache = cache
	self._base_paths = base_paths

	def get(self, key):
	path = FormatKey(key)
	def get_from_base_path(base_path):
	return self._cache.GetFromFile('%s/%s' % (base_path, path)).Get()
	for base_path in self._base_paths:
	try:
	return get_from_base_path(base_path)
	except FileNotFoundError:
	continue
	# Not found. Do the first operation again so that we get a stack trace - we
	# know that it'll fail.
	get_from_base_path(self._base_paths[0])
	raise AssertionError()