blob: ee244e881b3c66cb6b00235878be52e260a90d7e [file] [log] [blame]
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from HTMLParser import HTMLParser
import logging
import os
import re
from docs_server_utils import FormatKey
from file_system import FileNotFoundError
from third_party.handlebar import Handlebar
# TODO(kalman): rename this HTMLDataSource or other, then have separate intro
# article data sources created as instances of it.
_H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL)
class _IntroParser(HTMLParser):
''' An HTML parser which will parse table of contents and page title info out
of an intro.
'''
def __init__(self):
HTMLParser.__init__(self)
self.toc = []
self.page_title = None
self._recent_tag = None
self._current_heading = {}
def handle_starttag(self, tag, attrs):
id_ = ''
if tag not in ['h1', 'h2', 'h3']:
return
if tag != 'h1' or self.page_title is None:
self._recent_tag = tag
for attr in attrs:
if attr[0] == 'id':
id_ = attr[1]
if tag == 'h2':
self._current_heading = { 'link': id_, 'subheadings': [], 'title': '' }
self.toc.append(self._current_heading)
elif tag == 'h3':
self._current_heading = { 'link': id_, 'title': '' }
self.toc[-1]['subheadings'].append(self._current_heading)
def handle_endtag(self, tag):
if tag in ['h1', 'h2', 'h3']:
self._recent_tag = None
def handle_data(self, data):
if self._recent_tag is None:
return
if self._recent_tag == 'h1':
if self.page_title is None:
self.page_title = data
else:
self.page_title += data
elif self._recent_tag in ['h2', 'h3']:
self._current_heading['title'] += data
class IntroDataSource(object):
'''This class fetches the intros for a given API. From this intro, a table
of contents dictionary is created, which contains the headings in the intro.
'''
class Factory(object):
def __init__(self,
compiled_fs_factory,
file_system,
ref_resolver_factory,
base_paths):
self._cache = compiled_fs_factory.Create(file_system,
self._MakeIntroDict,
IntroDataSource)
self._ref_resolver = ref_resolver_factory.Create()
self._base_paths = base_paths
def _MakeIntroDict(self, intro_path, intro):
# Guess the name of the API from the path to the intro.
api_name = os.path.splitext(intro_path.split('/')[-1])[0]
intro_with_links = self._ref_resolver.ResolveAllLinks(intro,
namespace=api_name)
# TODO(kalman): Do $ref replacement after rendering the template, not
# before, so that (a) $ref links can contain template annotations, and (b)
# we can use CompiledFileSystem.ForTemplates to create the templates and
# save ourselves some effort.
apps_parser = _IntroParser()
apps_parser.feed(Handlebar(intro_with_links).render(
{ 'is_apps': True }).text)
extensions_parser = _IntroParser()
extensions_parser.feed(Handlebar(intro_with_links).render(
{ 'is_apps': False }).text)
# TODO(cduvall): Use the normal template rendering system, so we can check
# errors.
if extensions_parser.page_title != apps_parser.page_title:
logging.error(
'Title differs for apps and extensions: Apps: %s, Extensions: %s.' %
(extensions_parser.page_title, apps_parser.page_title))
# The templates will render the heading themselves, so remove it from the
# HTML content.
intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1)
return {
'intro': Handlebar(intro_with_links),
'title': apps_parser.page_title,
'apps_toc': apps_parser.toc,
'extensions_toc': extensions_parser.toc,
}
def Create(self):
return IntroDataSource(self._cache, self._base_paths)
def __init__(self, cache, base_paths):
self._cache = cache
self._base_paths = base_paths
def get(self, key):
path = FormatKey(key)
def get_from_base_path(base_path):
return self._cache.GetFromFile('%s/%s' % (base_path, path)).Get()
for base_path in self._base_paths:
try:
return get_from_base_path(base_path)
except FileNotFoundError:
continue
# Not found. Do the first operation again so that we get a stack trace - we
# know that it'll fail.
get_from_base_path(self._base_paths[0])
raise AssertionError()