scripts/app_engine_server/memcache_zipserve.py - platform/development - Git at Google

 #!/usr/bin/env python
 #
 # Copyright 2009 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 """A class to serve pages from zip files and use memcache for performance.

 This contains a class and a function to create an anonymous instance of the
 class to serve HTTP GET requests. Memcache is used to increase response speed
 and lower processing cycles used in serving. Credit to Guido van Rossum and
 his implementation of zipserve which served as a reference as I wrote this.

   MemcachedZipHandler: Class that serves request
   create_handler: method to create instance of MemcachedZipHandler
 """

 __author__ = 'jmatt@google.com (Justin Mattson)'

 import email.Utils
 import logging
 import mimetypes
 import time
 import zipfile

 from google.appengine.api import memcache
 from google.appengine.ext import webapp
 from google.appengine.ext.webapp import util
 from time import localtime, strftime

 def create_handler(zip_files, max_age=None, public=None):
   """Factory method to create a MemcachedZipHandler instance.

   Args:
     zip_files: A list of file names, or a list of lists of file name, first
         member of file mappings. See MemcachedZipHandler documentation for
         more information about using the list of lists format
     max_age: The maximum client-side cache lifetime
     public: Whether this should be declared public in the client-side cache
   Returns:
     A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
     Engine

   Raises:
     ValueError: if the zip_files argument is not a list
   """
   # verify argument integrity. If the argument is passed in list format,
   # convert it to list of lists format
   if zip_files and type(zip_files).__name__ == 'list':
     num_items = len(zip_files)
     while num_items > 0:
       if type(zip_files[num_items - 1]).__name__ != 'list':
         zip_files[num_items - 1] = [zip_files[num_items-1]]
       num_items -= 1
   else:
     raise ValueError('File name arguments must be a list')

   class HandlerWrapper(MemcachedZipHandler):
     """Simple wrapper for an instance of MemcachedZipHandler.

     I'm still not sure why this is needed
     """
     def get(self, name):
       self.zipfilenames = zip_files
       self.TrueGet(name)
       if max_age is not None:
         MAX_AGE = max_age
       if public is not None:
         PUBLIC = public

   return HandlerWrapper


 class MemcachedZipHandler(webapp.RequestHandler):
   """Handles get requests for a given URL.

   Serves a GET request from a series of zip files. As files are served they are
   put into memcache, which is much faster than retreiving them from the zip
   source file again. It also uses considerably fewer CPU cycles.
   """
   zipfile_cache = {}                # class cache of source zip files
   MAX_AGE = 600                     # max client-side cache lifetime
   PUBLIC = True                     # public cache setting
   CACHE_PREFIX = 'cache://'         # memcache key prefix for actual URLs
   NEG_CACHE_PREFIX = 'noncache://'  # memcache key prefix for non-existant URL
   intlString = 'intl/'
   validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']

   def TrueGet(self, reqUri):
     """The top-level entry point to serving requests.

     Called 'True' get because it does the work when called from the wrapper
     class' get method. Some logic is applied to the request to serve files
     from an intl/<lang>/... directory or fall through to the default language.

     Args:
       name: URL requested

     Returns:
       None
     """
     langName = 'en'
     resetLangCookie = False
     urlLangName = None
     retry = False
     isValidIntl = False
     isStripped = False

     # Try to retrieve the user's lang pref from the cookie. If there is no
     # lang pref cookie in the request, add set-cookie to the response with the
     # default value of 'en'.
     try:
       langName = self.request.cookies['android_developer_pref_lang']
     except KeyError:
       resetLangCookie = True
       #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
     logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)

     # Preprocess the req url. If it references a directory or the domain itself,
     # append '/index.html' to the url and 302 redirect. Otherwise, continue
     # processing the request below.
     name = self.PreprocessUrl(reqUri, langName)
     if name:
       # Do some prep for handling intl requests. Parse the url and validate
       # the intl/lang substring, extract the url lang code (urlLangName) and the
       # the uri that follows the intl/lang substring(contentUri)
       sections = name.split("/", 2)
       contentUri = 0
       isIntl = len(sections) > 1 and (sections[0] == "intl")
       if isIntl:
         isValidIntl = sections[1] in self.validLangs
         if isValidIntl:
           urlLangName = sections[1]
           contentUri = sections[2]
           logging.info('  Content URI is [%s]...', contentUri)
           if (urlLangName != langName) or (langName == 'en'):
             # if the lang code in the request is different from that in
             # the cookie, or if the target lang is en, strip the
             # intl/nn substring. It will later be redirected to
             # the user's preferred language url.
             # logging.info('  Handling a MISMATCHED intl request')
             name = contentUri
             isStripped = True
             isValidIntl = False
             isIntl = False

       # Send for processing
       if self.isCleanUrl(name, langName, isValidIntl, isStripped):
         # handle a 'clean' request.
         # Try to form a response using the actual request url.
         # logging.info('  Request being handled as clean: [%s]', name)
         if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie):
           # If CreateResponse returns False, there was no such document
           # in the intl/lang tree. Before going to 404, see if there is an
           # English-language version of the doc in the default
           # default tree and return it, else go to 404.
           self.CreateResponse(contentUri, langName, False, resetLangCookie)

       elif isIntl:
         # handle the case where we need to pass through an invalid intl req
         # for processing (so as to get 404 as appropriate). This is needed
         # because intl urls are passed through clean and retried in English,
         # if necessary.
         # logging.info('  Handling an invalid intl request...')
         self.CreateResponse(name, langName, isValidIntl, resetLangCookie)

       else:
         # handle the case where we have a non-clean url (usually a non-intl
         # url) that we need to interpret in the context of any lang pref
         # that is set. Prepend an intl/lang string to the request url and
         # send it as a 302 redirect. After the redirect, the subsequent
         # request will be handled as a clean url.
         self.RedirToIntl(name, self.intlString, langName)

   def isCleanUrl(self, name, langName, isValidIntl, isStripped):
     """Determine whether to pass an incoming url straight to processing.

        Args:
          name: The incoming URL

        Returns:
          boolean: Whether the URL should be sent straight to processing
     """
     # logging.info('  >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
     if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
       return True

   def PreprocessUrl(self, name, langName):
     """Any preprocessing work on the URL when it comes in.

     Put any work related to interpreting the incoming URL here. For example,
     this is used to redirect requests for a directory to the index.html file
     in that directory. Subclasses should override this method to do different
     preprocessing.

     Args:
       name: The incoming URL

     Returns:
       False if the request was redirected to '/index.html', or
       The processed URL, otherwise
     """
     # determine if this is a request for a directory
     final_path_segment = name
     final_slash_offset = name.rfind('/')
     if final_slash_offset != len(name) - 1:
       final_path_segment = name[final_slash_offset + 1:]
       if final_path_segment.find('.') == -1:
         name = ''.join([name, '/'])

     # if this is a directory or the domain itself, redirect to /index.html
     if not name or (name[len(name) - 1:] == '/'):
       uri = ''.join(['/', name, 'index.html'])
       # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
       self.redirect(uri, False)
       return False
     else:
       return name

   def RedirToIntl(self, name, intlString, langName):
     """Redirect an incoming request to the appropriate intl uri.

        For non-en langName, builds the intl/lang string from a
        base (en) string and redirects (302) the request to look for
        a version of the file in langName. For en langName, simply
        redirects a stripped uri string (intl/nn removed).

     Args:
       name: The incoming, preprocessed URL

     Returns:
       The lang-specific URL
     """
     if not (langName == 'en'):
       builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
     else:
       builtIntlLangUri = name
     uri = ''.join(['/', builtIntlLangUri])
     logging.info('-->>REDIRECTING %s to  %s', name, uri)
     self.redirect(uri, False)
     return uri

   def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
     """Process the url and form a response, if appropriate.

        Attempts to retrieve the requested file (name) from cache,
        negative cache, or store (zip) and form the response.
        For intl requests that are not found (in the localized tree),
        returns False rather than forming a response, so that
        the request can be retried with the base url (this is the
        fallthrough to default language).

        For requests that are found, forms the headers and
        adds the content to the response entity. If the request was
        for an intl (localized) url, also resets the language cookie
        to the language specified in the url if needed, to ensure that
        the client language and response data remain harmonious.

     Args:
       name: The incoming, preprocessed URL
       langName: The language id. Used as necessary to reset the
                 language cookie in the response.
       isValidIntl: If present, indicates whether the request is
                    for a language-specific url
       resetLangCookie: Whether the response should reset the
                        language cookie to 'langName'

     Returns:
       True: A response was successfully created for the request
       False: No response was created.
     """
     # see if we have the page in the memcache
     logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
       name, langName, isValidIntl, resetLangCookie)
     resp_data = self.GetFromCache(name)
     if resp_data is None:
       logging.info('  Cache miss for %s', name)
       resp_data = self.GetFromNegativeCache(name)
       if resp_data is None:
         resp_data = self.GetFromStore(name)

         # IF we have the file, put it in the memcache
         # ELSE put it in the negative cache
         if resp_data is not None:
           self.StoreOrUpdateInCache(name, resp_data)
         elif isValidIntl:
           # couldn't find the intl doc. Try to fall through to English.
           #logging.info('  Retrying with base uri...')
           return False
         else:
           logging.info('  Adding %s to negative cache, serving 404', name)
           self.StoreInNegativeCache(name)
           self.Write404Error()
           return True
       else:
         # found it in negative cache
         self.Write404Error()
         return True

     # found content from cache or store
     logging.info('FOUND CLEAN')
     if resetLangCookie:
       logging.info('  Resetting android_developer_pref_lang cookie to [%s]',
       langName)
       expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
       self.response.headers.add_header('Set-Cookie',
       'android_developer_pref_lang=%s; path=/; expires=%s' %
       (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
     mustRevalidate = False
     if ('.html' in name):
       # revalidate html files -- workaround for cache inconsistencies for
       # negotiated responses
       mustRevalidate = True
       #logging.info('  Adding [Vary: Cookie] to response...')
       self.response.headers.add_header('Vary', 'Cookie')
     content_type, encoding = mimetypes.guess_type(name)
     if content_type:
       self.response.headers['Content-Type'] = content_type
       self.SetCachingHeaders(mustRevalidate)
       self.response.out.write(resp_data)
     elif (name == 'favicon.ico'):
       self.response.headers['Content-Type'] = 'image/x-icon'
       self.SetCachingHeaders(mustRevalidate)
       self.response.out.write(resp_data)
     elif name.endswith('.psd'):
       self.response.headers['Content-Type'] = 'application/octet-stream'
       self.SetCachingHeaders(mustRevalidate)
       self.response.out.write(resp_data)
     elif name.endswith('.svg'):
       self.response.headers['Content-Type'] = 'image/svg+xml'
       self.SetCachingHeaders(mustRevalidate)
       self.response.out.write(resp_data)
     elif name.endswith('.mp4'):
       self.response.headers['Content-Type'] = 'video/mp4'
       self.SetCachingHeaders(mustRevalidate)
       self.response.out.write(resp_data)
     elif name.endswith('.webm'):
       self.response.headers['Content-Type'] = 'video/webm'
       self.SetCachingHeaders(mustRevalidate)
       self.response.out.write(resp_data)
     return True

   def GetFromStore(self, file_path):
     """Retrieve file from zip files.

     Get the file from the source, it must not have been in the memcache. If
     possible, we'll use the zip file index to quickly locate where the file
     should be found. (See MapToFileArchive documentation for assumptions about
     file ordering.) If we don't have an index or don't find the file where the
     index says we should, look through all the zip files to find it.

     Args:
       file_path: the file that we're looking for

     Returns:
       The contents of the requested file
     """
     resp_data = None
     file_itr = iter(self.zipfilenames)

     # check the index, if we have one, to see what archive the file is in
     archive_name = self.MapFileToArchive(file_path)
     if not archive_name:
       archive_name = file_itr.next()[0]

     while resp_data is None and archive_name:
       zip_archive = self.LoadZipFile(archive_name)
       if zip_archive:

         # we expect some lookups will fail, and that's okay, 404s will deal
         # with that
         try:
           resp_data = zip_archive.read(file_path)
         except (KeyError, RuntimeError), err:
           # no op
           x = False
         if resp_data is not None:
           logging.info('%s read from %s', file_path, archive_name)

       try:
         archive_name = file_itr.next()[0]
       except (StopIteration), err:
         archive_name = False

     return resp_data

   def LoadZipFile(self, zipfilename):
     """Convenience method to load zip file.

     Just a convenience method to load the zip file from the data store. This is
     useful if we ever want to change data stores and also as a means of
     dependency injection for testing. This method will look at our file cache
     first, and then load and cache the file if there's a cache miss

     Args:
       zipfilename: the name of the zip file to load

     Returns:
       The zip file requested, or None if there is an I/O error
     """
     zip_archive = None
     zip_archive = self.zipfile_cache.get(zipfilename)
     if zip_archive is None:
       try:
         zip_archive = zipfile.ZipFile(zipfilename)
         self.zipfile_cache[zipfilename] = zip_archive
       except (IOError, RuntimeError), err:
         logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
                                                              err))
     return zip_archive

   def MapFileToArchive(self, file_path):
     """Given a file name, determine what archive it should be in.

     This method makes two critical assumptions.
     (1) The zip files passed as an argument to the handler, if concatenated
         in that same order, would result in a total ordering
         of all the files. See (2) for ordering type.
     (2) Upper case letters before lower case letters. The traversal of a
         directory tree is depth first. A parent directory's files are added
         before the files of any child directories

     Args:
       file_path: the file to be mapped to an archive

     Returns:
       The name of the archive where we expect the file to be
     """
     num_archives = len(self.zipfilenames)
     while num_archives > 0:
       target = self.zipfilenames[num_archives - 1]
       if len(target) > 1:
         if self.CompareFilenames(target[1], file_path) >= 0:
           return target[0]
       num_archives -= 1

     return None

   def CompareFilenames(self, file1, file2):
     """Determines whether file1 is lexigraphically 'before' file2.

     WARNING: This method assumes that paths are output in a depth-first,
     with parent directories' files stored before childs'

     We say that file1 is lexigraphically before file2 if the last non-matching
     path segment of file1 is alphabetically before file2.

     Args:
       file1: the first file path
       file2: the second file path

     Returns:
       A positive number if file1 is before file2
       A negative number if file2 is before file1
       0 if filenames are the same
     """
     f1_segments = file1.split('/')
     f2_segments = file2.split('/')

     segment_ptr = 0
     while (segment_ptr < len(f1_segments) and
            segment_ptr < len(f2_segments) and
            f1_segments[segment_ptr] == f2_segments[segment_ptr]):
       segment_ptr += 1

     if len(f1_segments) == len(f2_segments):

       # we fell off the end, the paths much be the same
       if segment_ptr == len(f1_segments):
         return 0

       # we didn't fall of the end, compare the segments where they differ
       if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
         return 1
       elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
         return -1
       else:
         return 0

       # the number of segments differs, we either mismatched comparing
       # directories, or comparing a file to a directory
     else:

       # IF we were looking at the last segment of one of the paths,
       # the one with fewer segments is first because files come before
       # directories
       # ELSE we just need to compare directory names
       if (segment_ptr + 1 == len(f1_segments) or
           segment_ptr + 1 == len(f2_segments)):
         return len(f2_segments) - len(f1_segments)
       else:
         if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
           return 1
         elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
           return -1
         else:
           return 0

   def SetCachingHeaders(self, revalidate):
     """Set caching headers for the request."""
     max_age = self.MAX_AGE
     #self.response.headers['Expires'] = email.Utils.formatdate(
     #    time.time() + max_age, usegmt=True)
     cache_control = []
     if self.PUBLIC:
       cache_control.append('public')
     cache_control.append('max-age=%d' % max_age)
     if revalidate:
       cache_control.append('must-revalidate')
     self.response.headers['Cache-Control'] = ', '.join(cache_control)

   def GetFromCache(self, filename):
     """Get file from memcache, if available.

     Args:
       filename: The URL of the file to return

     Returns:
       The content of the file
     """
     return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))

   def StoreOrUpdateInCache(self, filename, data):
     """Store data in the cache.

     Store a piece of data in the memcache. Memcache has a maximum item size of
     1*10^6 bytes. If the data is too large, fail, but log the failure. Future
     work will consider compressing the data before storing or chunking it

     Args:
       filename: the name of the file to store
       data: the data of the file

     Returns:
       None
     """
     try:
       if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
         memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
     except (ValueError), err:
       logging.warning('Data size too large to cache\n%s' % err)

   def Write404Error(self):
     """Ouptut a simple 404 response."""
     self.error(404)
     self.response.out.write(
         ''.join(['<html><head><title>404: Not Found</title></head>',
                  '<body><b><h2>Error 404</h2><br/>',
                  'File not found</b></body></html>']))

   def StoreInNegativeCache(self, filename):
     """If a non-existant URL is accessed, cache this result as well.

     Future work should consider setting a maximum negative cache size to
     prevent it from from negatively impacting the real cache.

     Args:
       filename: URL to add ot negative cache

     Returns:
       None
     """
     memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)

   def GetFromNegativeCache(self, filename):
     """Retrieve from negative cache.

     Args:
       filename: URL to retreive

     Returns:
       The file contents if present in the negative cache.
     """
     return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))

 def main():
   application = webapp.WSGIApplication([('/([^/]+)/(.*)',
                                          MemcachedZipHandler)])
   util.run_wsgi_app(application)


 if __name__ == '__main__':
   main()
	#!/usr/bin/env python
	#
	# Copyright 2009 Google Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	"""A class to serve pages from zip files and use memcache for performance.

	This contains a class and a function to create an anonymous instance of the
	class to serve HTTP GET requests. Memcache is used to increase response speed
	and lower processing cycles used in serving. Credit to Guido van Rossum and
	his implementation of zipserve which served as a reference as I wrote this.

	MemcachedZipHandler: Class that serves request
	create_handler: method to create instance of MemcachedZipHandler
	"""

	__author__ = 'jmatt@google.com (Justin Mattson)'

	import email.Utils
	import logging
	import mimetypes
	import time
	import zipfile

	from google.appengine.api import memcache
	from google.appengine.ext import webapp
	from google.appengine.ext.webapp import util
	from time import localtime, strftime

	def create_handler(zip_files, max_age=None, public=None):
	"""Factory method to create a MemcachedZipHandler instance.

	Args:
	zip_files: A list of file names, or a list of lists of file name, first
	member of file mappings. See MemcachedZipHandler documentation for
	more information about using the list of lists format
	max_age: The maximum client-side cache lifetime
	public: Whether this should be declared public in the client-side cache
	Returns:
	A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
	Engine

	Raises:
	ValueError: if the zip_files argument is not a list
	"""
	# verify argument integrity. If the argument is passed in list format,
	# convert it to list of lists format
	if zip_files and type(zip_files).__name__ == 'list':
	num_items = len(zip_files)
	while num_items > 0:
	if type(zip_files[num_items - 1]).__name__ != 'list':
	zip_files[num_items - 1] = [zip_files[num_items-1]]
	num_items -= 1
	else:
	raise ValueError('File name arguments must be a list')

	class HandlerWrapper(MemcachedZipHandler):
	"""Simple wrapper for an instance of MemcachedZipHandler.

	I'm still not sure why this is needed
	"""
	def get(self, name):
	self.zipfilenames = zip_files
	self.TrueGet(name)
	if max_age is not None:
	MAX_AGE = max_age
	if public is not None:
	PUBLIC = public

	return HandlerWrapper


	class MemcachedZipHandler(webapp.RequestHandler):
	"""Handles get requests for a given URL.

	Serves a GET request from a series of zip files. As files are served they are
	put into memcache, which is much faster than retreiving them from the zip
	source file again. It also uses considerably fewer CPU cycles.
	"""
	zipfile_cache = {} # class cache of source zip files
	MAX_AGE = 600 # max client-side cache lifetime
	PUBLIC = True # public cache setting
	CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
	NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
	intlString = 'intl/'
	validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']

	def TrueGet(self, reqUri):
	"""The top-level entry point to serving requests.

	Called 'True' get because it does the work when called from the wrapper
	class' get method. Some logic is applied to the request to serve files
	from an intl/<lang>/... directory or fall through to the default language.

	Args:
	name: URL requested

	Returns:
	None
	"""
	langName = 'en'
	resetLangCookie = False
	urlLangName = None
	retry = False
	isValidIntl = False
	isStripped = False

	# Try to retrieve the user's lang pref from the cookie. If there is no
	# lang pref cookie in the request, add set-cookie to the response with the
	# default value of 'en'.
	try:
	langName = self.request.cookies['android_developer_pref_lang']
	except KeyError:
	resetLangCookie = True
	#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
	logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)

	# Preprocess the req url. If it references a directory or the domain itself,
	# append '/index.html' to the url and 302 redirect. Otherwise, continue
	# processing the request below.
	name = self.PreprocessUrl(reqUri, langName)
	if name:
	# Do some prep for handling intl requests. Parse the url and validate
	# the intl/lang substring, extract the url lang code (urlLangName) and the
	# the uri that follows the intl/lang substring(contentUri)
	sections = name.split("/", 2)
	contentUri = 0
	isIntl = len(sections) > 1 and (sections[0] == "intl")
	if isIntl:
	isValidIntl = sections[1] in self.validLangs
	if isValidIntl:
	urlLangName = sections[1]
	contentUri = sections[2]
	logging.info(' Content URI is [%s]...', contentUri)
	if (urlLangName != langName) or (langName == 'en'):
	# if the lang code in the request is different from that in
	# the cookie, or if the target lang is en, strip the
	# intl/nn substring. It will later be redirected to
	# the user's preferred language url.
	# logging.info(' Handling a MISMATCHED intl request')
	name = contentUri
	isStripped = True
	isValidIntl = False
	isIntl = False

	# Send for processing
	if self.isCleanUrl(name, langName, isValidIntl, isStripped):
	# handle a 'clean' request.
	# Try to form a response using the actual request url.
	# logging.info(' Request being handled as clean: [%s]', name)
	if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie):
	# If CreateResponse returns False, there was no such document
	# in the intl/lang tree. Before going to 404, see if there is an
	# English-language version of the doc in the default
	# default tree and return it, else go to 404.
	self.CreateResponse(contentUri, langName, False, resetLangCookie)

	elif isIntl:
	# handle the case where we need to pass through an invalid intl req
	# for processing (so as to get 404 as appropriate). This is needed
	# because intl urls are passed through clean and retried in English,
	# if necessary.
	# logging.info(' Handling an invalid intl request...')
	self.CreateResponse(name, langName, isValidIntl, resetLangCookie)

	else:
	# handle the case where we have a non-clean url (usually a non-intl
	# url) that we need to interpret in the context of any lang pref
	# that is set. Prepend an intl/lang string to the request url and
	# send it as a 302 redirect. After the redirect, the subsequent
	# request will be handled as a clean url.
	self.RedirToIntl(name, self.intlString, langName)

	def isCleanUrl(self, name, langName, isValidIntl, isStripped):
	"""Determine whether to pass an incoming url straight to processing.

	Args:
	name: The incoming URL

	Returns:
	boolean: Whether the URL should be sent straight to processing
	"""
	# logging.info(' >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
	if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
	return True

	def PreprocessUrl(self, name, langName):
	"""Any preprocessing work on the URL when it comes in.

	Put any work related to interpreting the incoming URL here. For example,
	this is used to redirect requests for a directory to the index.html file
	in that directory. Subclasses should override this method to do different
	preprocessing.

	Args:
	name: The incoming URL

	Returns:
	False if the request was redirected to '/index.html', or
	The processed URL, otherwise
	"""
	# determine if this is a request for a directory
	final_path_segment = name
	final_slash_offset = name.rfind('/')
	if final_slash_offset != len(name) - 1:
	final_path_segment = name[final_slash_offset + 1:]
	if final_path_segment.find('.') == -1:
	name = ''.join([name, '/'])

	# if this is a directory or the domain itself, redirect to /index.html
	if not name or (name[len(name) - 1:] == '/'):
	uri = ''.join(['/', name, 'index.html'])
	# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
	self.redirect(uri, False)
	return False
	else:
	return name

	def RedirToIntl(self, name, intlString, langName):
	"""Redirect an incoming request to the appropriate intl uri.

	For non-en langName, builds the intl/lang string from a
	base (en) string and redirects (302) the request to look for
	a version of the file in langName. For en langName, simply
	redirects a stripped uri string (intl/nn removed).

	Args:
	name: The incoming, preprocessed URL

	Returns:
	The lang-specific URL
	"""
	if not (langName == 'en'):
	builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
	else:
	builtIntlLangUri = name
	uri = ''.join(['/', builtIntlLangUri])
	logging.info('-->>REDIRECTING %s to %s', name, uri)
	self.redirect(uri, False)
	return uri

	def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
	"""Process the url and form a response, if appropriate.

	Attempts to retrieve the requested file (name) from cache,
	negative cache, or store (zip) and form the response.
	For intl requests that are not found (in the localized tree),
	returns False rather than forming a response, so that
	the request can be retried with the base url (this is the
	fallthrough to default language).

	For requests that are found, forms the headers and
	adds the content to the response entity. If the request was
	for an intl (localized) url, also resets the language cookie
	to the language specified in the url if needed, to ensure that
	the client language and response data remain harmonious.

	Args:
	name: The incoming, preprocessed URL
	langName: The language id. Used as necessary to reset the
	language cookie in the response.
	isValidIntl: If present, indicates whether the request is
	for a language-specific url
	resetLangCookie: Whether the response should reset the
	language cookie to 'langName'

	Returns:
	True: A response was successfully created for the request
	False: No response was created.
	"""
	# see if we have the page in the memcache
	logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
	name, langName, isValidIntl, resetLangCookie)
	resp_data = self.GetFromCache(name)
	if resp_data is None:
	logging.info(' Cache miss for %s', name)
	resp_data = self.GetFromNegativeCache(name)
	if resp_data is None:
	resp_data = self.GetFromStore(name)

	# IF we have the file, put it in the memcache
	# ELSE put it in the negative cache
	if resp_data is not None:
	self.StoreOrUpdateInCache(name, resp_data)
	elif isValidIntl:
	# couldn't find the intl doc. Try to fall through to English.
	#logging.info(' Retrying with base uri...')
	return False
	else:
	logging.info(' Adding %s to negative cache, serving 404', name)
	self.StoreInNegativeCache(name)
	self.Write404Error()
	return True
	else:
	# found it in negative cache
	self.Write404Error()
	return True

	# found content from cache or store
	logging.info('FOUND CLEAN')
	if resetLangCookie:
	logging.info(' Resetting android_developer_pref_lang cookie to [%s]',
	langName)
	expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
	self.response.headers.add_header('Set-Cookie',
	'android_developer_pref_lang=%s; path=/; expires=%s' %
	(langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
	mustRevalidate = False
	if ('.html' in name):
	# revalidate html files -- workaround for cache inconsistencies for
	# negotiated responses
	mustRevalidate = True
	#logging.info(' Adding [Vary: Cookie] to response...')
	self.response.headers.add_header('Vary', 'Cookie')
	content_type, encoding = mimetypes.guess_type(name)
	if content_type:
	self.response.headers['Content-Type'] = content_type
	self.SetCachingHeaders(mustRevalidate)
	self.response.out.write(resp_data)
	elif (name == 'favicon.ico'):
	self.response.headers['Content-Type'] = 'image/x-icon'
	self.SetCachingHeaders(mustRevalidate)
	self.response.out.write(resp_data)
	elif name.endswith('.psd'):
	self.response.headers['Content-Type'] = 'application/octet-stream'
	self.SetCachingHeaders(mustRevalidate)
	self.response.out.write(resp_data)
	elif name.endswith('.svg'):
	self.response.headers['Content-Type'] = 'image/svg+xml'
	self.SetCachingHeaders(mustRevalidate)
	self.response.out.write(resp_data)
	elif name.endswith('.mp4'):
	self.response.headers['Content-Type'] = 'video/mp4'
	self.SetCachingHeaders(mustRevalidate)
	self.response.out.write(resp_data)
	elif name.endswith('.webm'):
	self.response.headers['Content-Type'] = 'video/webm'
	self.SetCachingHeaders(mustRevalidate)
	self.response.out.write(resp_data)
	return True

	def GetFromStore(self, file_path):
	"""Retrieve file from zip files.

	Get the file from the source, it must not have been in the memcache. If
	possible, we'll use the zip file index to quickly locate where the file
	should be found. (See MapToFileArchive documentation for assumptions about
	file ordering.) If we don't have an index or don't find the file where the
	index says we should, look through all the zip files to find it.

	Args:
	file_path: the file that we're looking for

	Returns:
	The contents of the requested file
	"""
	resp_data = None
	file_itr = iter(self.zipfilenames)

	# check the index, if we have one, to see what archive the file is in
	archive_name = self.MapFileToArchive(file_path)
	if not archive_name:
	archive_name = file_itr.next()[0]

	while resp_data is None and archive_name:
	zip_archive = self.LoadZipFile(archive_name)
	if zip_archive:

	# we expect some lookups will fail, and that's okay, 404s will deal
	# with that
	try:
	resp_data = zip_archive.read(file_path)
	except (KeyError, RuntimeError), err:
	# no op
	x = False
	if resp_data is not None:
	logging.info('%s read from %s', file_path, archive_name)

	try:
	archive_name = file_itr.next()[0]
	except (StopIteration), err:
	archive_name = False

	return resp_data

	def LoadZipFile(self, zipfilename):
	"""Convenience method to load zip file.

	Just a convenience method to load the zip file from the data store. This is
	useful if we ever want to change data stores and also as a means of
	dependency injection for testing. This method will look at our file cache
	first, and then load and cache the file if there's a cache miss

	Args:
	zipfilename: the name of the zip file to load

	Returns:
	The zip file requested, or None if there is an I/O error
	"""
	zip_archive = None
	zip_archive = self.zipfile_cache.get(zipfilename)
	if zip_archive is None:
	try:
	zip_archive = zipfile.ZipFile(zipfilename)
	self.zipfile_cache[zipfilename] = zip_archive
	except (IOError, RuntimeError), err:
	logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
	err))
	return zip_archive

	def MapFileToArchive(self, file_path):
	"""Given a file name, determine what archive it should be in.

	This method makes two critical assumptions.
	(1) The zip files passed as an argument to the handler, if concatenated
	in that same order, would result in a total ordering
	of all the files. See (2) for ordering type.
	(2) Upper case letters before lower case letters. The traversal of a
	directory tree is depth first. A parent directory's files are added
	before the files of any child directories

	Args:
	file_path: the file to be mapped to an archive

	Returns:
	The name of the archive where we expect the file to be
	"""
	num_archives = len(self.zipfilenames)
	while num_archives > 0:
	target = self.zipfilenames[num_archives - 1]
	if len(target) > 1:
	if self.CompareFilenames(target[1], file_path) >= 0:
	return target[0]
	num_archives -= 1

	return None

	def CompareFilenames(self, file1, file2):
	"""Determines whether file1 is lexigraphically 'before' file2.

	WARNING: This method assumes that paths are output in a depth-first,
	with parent directories' files stored before childs'

	We say that file1 is lexigraphically before file2 if the last non-matching
	path segment of file1 is alphabetically before file2.

	Args:
	file1: the first file path
	file2: the second file path

	Returns:
	A positive number if file1 is before file2
	A negative number if file2 is before file1
	0 if filenames are the same
	"""
	f1_segments = file1.split('/')
	f2_segments = file2.split('/')

	segment_ptr = 0
	while (segment_ptr < len(f1_segments) and
	segment_ptr < len(f2_segments) and
	f1_segments[segment_ptr] == f2_segments[segment_ptr]):
	segment_ptr += 1

	if len(f1_segments) == len(f2_segments):

	# we fell off the end, the paths much be the same
	if segment_ptr == len(f1_segments):
	return 0

	# we didn't fall of the end, compare the segments where they differ
	if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
	return 1
	elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
	return -1
	else:
	return 0

	# the number of segments differs, we either mismatched comparing
	# directories, or comparing a file to a directory
	else:

	# IF we were looking at the last segment of one of the paths,
	# the one with fewer segments is first because files come before
	# directories
	# ELSE we just need to compare directory names
	if (segment_ptr + 1 == len(f1_segments) or
	segment_ptr + 1 == len(f2_segments)):
	return len(f2_segments) - len(f1_segments)
	else:
	if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
	return 1
	elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
	return -1
	else:
	return 0

	def SetCachingHeaders(self, revalidate):
	"""Set caching headers for the request."""
	max_age = self.MAX_AGE
	#self.response.headers['Expires'] = email.Utils.formatdate(
	# time.time() + max_age, usegmt=True)
	cache_control = []
	if self.PUBLIC:
	cache_control.append('public')
	cache_control.append('max-age=%d' % max_age)
	if revalidate:
	cache_control.append('must-revalidate')
	self.response.headers['Cache-Control'] = ', '.join(cache_control)

	def GetFromCache(self, filename):
	"""Get file from memcache, if available.

	Args:
	filename: The URL of the file to return

	Returns:
	The content of the file
	"""
	return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))

	def StoreOrUpdateInCache(self, filename, data):
	"""Store data in the cache.

	Store a piece of data in the memcache. Memcache has a maximum item size of
	1*10^6 bytes. If the data is too large, fail, but log the failure. Future
	work will consider compressing the data before storing or chunking it

	Args:
	filename: the name of the file to store
	data: the data of the file

	Returns:
	None
	"""
	try:
	if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
	memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
	except (ValueError), err:
	logging.warning('Data size too large to cache\n%s' % err)

	def Write404Error(self):
	"""Ouptut a simple 404 response."""
	self.error(404)
	self.response.out.write(
	''.join(['<html><head><title>404: Not Found</title></head>',
	'<body><b><h2>Error 404</h2><br/>',
	'File not found</b></body></html>']))

	def StoreInNegativeCache(self, filename):
	"""If a non-existant URL is accessed, cache this result as well.

	Future work should consider setting a maximum negative cache size to
	prevent it from from negatively impacting the real cache.

	Args:
	filename: URL to add ot negative cache

	Returns:
	None
	"""
	memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)

	def GetFromNegativeCache(self, filename):
	"""Retrieve from negative cache.

	Args:
	filename: URL to retreive

	Returns:
	The file contents if present in the negative cache.
	"""
	return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))

	def main():
	application = webapp.WSGIApplication([('/([^/]+)/(.*)',
	MemcachedZipHandler)])
	util.run_wsgi_app(application)


	if __name__ == '__main__':
	main()