| # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
| # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
| # (c) 2005 Ian Bicking, Clark C. Evans and contributors |
| # This module is part of the Python Paste Project and is released under |
| # the MIT License: http://www.opensource.org/licenses/mit-license.php |
| """ |
| This module handles sending static content such as in-memory data or |
| files. At this time it has cache helpers and understands the |
| if-modified-since request header. |
| """ |
| |
| import os, time, mimetypes, zipfile, tarfile |
| from paste.httpexceptions import * |
| from paste.httpheaders import * |
| |
| CACHE_SIZE = 4096 |
| BLOCK_SIZE = 4096 * 16 |
| |
| __all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore'] |
| |
| class DataApp(object): |
| """ |
| Returns an application that will send content in a single chunk, |
| this application has support for setting cache-control and for |
| responding to conditional (or HEAD) requests. |
| |
| Constructor Arguments: |
| |
| ``content`` the content being sent to the client |
| |
| ``headers`` the headers to send /w the response |
| |
| The remaining ``kwargs`` correspond to headers, where the |
| underscore is replaced with a dash. These values are only |
| added to the headers if they are not already provided; thus, |
| they can be used for default values. Examples include, but |
| are not limited to: |
| |
| ``content_type`` |
| ``content_encoding`` |
| ``content_location`` |
| |
| ``cache_control()`` |
| |
| This method provides validated construction of the ``Cache-Control`` |
| header as well as providing for automated filling out of the |
| ``EXPIRES`` header for HTTP/1.0 clients. |
| |
| ``set_content()`` |
| |
| This method provides a mechanism to set the content after the |
| application has been constructed. This method does things |
| like changing ``Last-Modified`` and ``Content-Length`` headers. |
| |
| """ |
| |
| allowed_methods = ('GET', 'HEAD') |
| |
| def __init__(self, content, headers=None, allowed_methods=None, |
| **kwargs): |
| assert isinstance(headers, (type(None), list)) |
| self.expires = None |
| self.content = None |
| self.content_length = None |
| self.last_modified = 0 |
| if allowed_methods is not None: |
| self.allowed_methods = allowed_methods |
| self.headers = headers or [] |
| for (k, v) in kwargs.items(): |
| header = get_header(k) |
| header.update(self.headers, v) |
| ACCEPT_RANGES.update(self.headers, bytes=True) |
| if not CONTENT_TYPE(self.headers): |
| CONTENT_TYPE.update(self.headers) |
| if content is not None: |
| self.set_content(content) |
| |
| def cache_control(self, **kwargs): |
| self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None |
| return self |
| |
| def set_content(self, content, last_modified=None): |
| assert content is not None |
| if last_modified is None: |
| self.last_modified = time.time() |
| else: |
| self.last_modified = last_modified |
| self.content = content |
| self.content_length = len(content) |
| LAST_MODIFIED.update(self.headers, time=self.last_modified) |
| return self |
| |
| def content_disposition(self, **kwargs): |
| CONTENT_DISPOSITION.apply(self.headers, **kwargs) |
| return self |
| |
| def __call__(self, environ, start_response): |
| method = environ['REQUEST_METHOD'].upper() |
| if method not in self.allowed_methods: |
| exc = HTTPMethodNotAllowed( |
| 'You cannot %s a file' % method, |
| headers=[('Allow', ','.join(self.allowed_methods))]) |
| return exc(environ, start_response) |
| return self.get(environ, start_response) |
| |
| def calculate_etag(self): |
| return '"%s-%s"' % (self.last_modified, self.content_length) |
| |
| def get(self, environ, start_response): |
| headers = self.headers[:] |
| current_etag = self.calculate_etag() |
| ETAG.update(headers, current_etag) |
| if self.expires is not None: |
| EXPIRES.update(headers, delta=self.expires) |
| |
| try: |
| client_etags = IF_NONE_MATCH.parse(environ) |
| if client_etags: |
| for etag in client_etags: |
| if etag == current_etag or etag == '*': |
| # horribly inefficient, n^2 performance, yuck! |
| for head in list_headers(entity=True): |
| head.delete(headers) |
| start_response('304 Not Modified', headers) |
| return [b''] |
| except HTTPBadRequest as exce: |
| return exce.wsgi_application(environ, start_response) |
| |
| # If we get If-None-Match and If-Modified-Since, and |
| # If-None-Match doesn't match, then we should not try to |
| # figure out If-Modified-Since (which has 1-second granularity |
| # and just isn't as accurate) |
| if not client_etags: |
| try: |
| client_clock = IF_MODIFIED_SINCE.parse(environ) |
| if (client_clock is not None |
| and client_clock >= int(self.last_modified)): |
| # horribly inefficient, n^2 performance, yuck! |
| for head in list_headers(entity=True): |
| head.delete(headers) |
| start_response('304 Not Modified', headers) |
| return [b''] # empty body |
| except HTTPBadRequest as exce: |
| return exce.wsgi_application(environ, start_response) |
| |
| (lower, upper) = (0, self.content_length - 1) |
| range = RANGE.parse(environ) |
| if range and 'bytes' == range[0] and 1 == len(range[1]): |
| (lower, upper) = range[1][0] |
| upper = upper or (self.content_length - 1) |
| if upper >= self.content_length or lower > upper: |
| return HTTPRequestRangeNotSatisfiable(( |
| "Range request was made beyond the end of the content,\r\n" |
| "which is %s long.\r\n Range: %s\r\n") % ( |
| self.content_length, RANGE(environ)) |
| ).wsgi_application(environ, start_response) |
| |
| content_length = upper - lower + 1 |
| CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper, |
| total_length = self.content_length) |
| CONTENT_LENGTH.update(headers, content_length) |
| if range or content_length != self.content_length: |
| start_response('206 Partial Content', headers) |
| else: |
| start_response('200 OK', headers) |
| if self.content is not None: |
| return [self.content[lower:upper+1]] |
| return (lower, content_length) |
| |
| class FileApp(DataApp): |
| """ |
| Returns an application that will send the file at the given |
| filename. Adds a mime type based on ``mimetypes.guess_type()``. |
| See DataApp for the arguments beyond ``filename``. |
| """ |
| |
| def __init__(self, filename, headers=None, **kwargs): |
| self.filename = filename |
| content_type, content_encoding = self.guess_type() |
| if content_type and 'content_type' not in kwargs: |
| kwargs['content_type'] = content_type |
| if content_encoding and 'content_encoding' not in kwargs: |
| kwargs['content_encoding'] = content_encoding |
| DataApp.__init__(self, None, headers, **kwargs) |
| |
| def guess_type(self): |
| return mimetypes.guess_type(self.filename) |
| |
| def update(self, force=False): |
| stat = os.stat(self.filename) |
| if not force and stat.st_mtime == self.last_modified: |
| return |
| self.last_modified = stat.st_mtime |
| if stat.st_size < CACHE_SIZE: |
| fh = open(self.filename,"rb") |
| self.set_content(fh.read(), stat.st_mtime) |
| fh.close() |
| else: |
| self.content = None |
| self.content_length = stat.st_size |
| # This is updated automatically if self.set_content() is |
| # called |
| LAST_MODIFIED.update(self.headers, time=self.last_modified) |
| |
| def get(self, environ, start_response): |
| is_head = environ['REQUEST_METHOD'].upper() == 'HEAD' |
| if 'max-age=0' in CACHE_CONTROL(environ).lower(): |
| self.update(force=True) # RFC 2616 13.2.6 |
| else: |
| self.update() |
| if not self.content: |
| if not os.path.exists(self.filename): |
| exc = HTTPNotFound( |
| 'The resource does not exist', |
| comment="No file at %r" % self.filename) |
| return exc(environ, start_response) |
| try: |
| file = open(self.filename, 'rb') |
| except (IOError, OSError) as e: |
| exc = HTTPForbidden( |
| 'You are not permitted to view this file (%s)' % e) |
| return exc.wsgi_application( |
| environ, start_response) |
| retval = DataApp.get(self, environ, start_response) |
| if isinstance(retval, list): |
| # cached content, exception, or not-modified |
| if is_head: |
| return [b''] |
| return retval |
| (lower, content_length) = retval |
| if is_head: |
| return [b''] |
| file.seek(lower) |
| file_wrapper = environ.get('wsgi.file_wrapper', None) |
| if file_wrapper: |
| return file_wrapper(file, BLOCK_SIZE) |
| else: |
| return _FileIter(file, size=content_length) |
| |
| class _FileIter(object): |
| |
| def __init__(self, file, block_size=None, size=None): |
| self.file = file |
| self.size = size |
| self.block_size = block_size or BLOCK_SIZE |
| |
| def __iter__(self): |
| return self |
| |
| def next(self): |
| chunk_size = self.block_size |
| if self.size is not None: |
| if chunk_size > self.size: |
| chunk_size = self.size |
| self.size -= chunk_size |
| data = self.file.read(chunk_size) |
| if not data: |
| raise StopIteration |
| return data |
| __next__ = next |
| |
| def close(self): |
| self.file.close() |
| |
| |
| class DirectoryApp(object): |
| """ |
| Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO. |
| FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory. |
| To customize FileApp creation override ``DirectoryApp.make_fileapp`` |
| """ |
| |
| def __init__(self, path): |
| self.path = os.path.abspath(path) |
| if not self.path.endswith(os.path.sep): |
| self.path += os.path.sep |
| assert os.path.isdir(self.path) |
| self.cached_apps = {} |
| |
| make_fileapp = FileApp |
| |
| def __call__(self, environ, start_response): |
| path_info = environ['PATH_INFO'] |
| app = self.cached_apps.get(path_info) |
| if app is None: |
| path = os.path.join(self.path, path_info.lstrip('/')) |
| if not os.path.normpath(path).startswith(self.path): |
| app = HTTPForbidden() |
| elif os.path.isfile(path): |
| app = self.make_fileapp(path) |
| self.cached_apps[path_info] = app |
| else: |
| app = HTTPNotFound(comment=path) |
| return app(environ, start_response) |
| |
| |
| class ArchiveStore(object): |
| """ |
| Returns an application that serves up a DataApp for items requested |
| in a given zip or tar archive. |
| |
| Constructor Arguments: |
| |
| ``filepath`` the path to the archive being served |
| |
| ``cache_control()`` |
| |
| This method provides validated construction of the ``Cache-Control`` |
| header as well as providing for automated filling out of the |
| ``EXPIRES`` header for HTTP/1.0 clients. |
| """ |
| |
| def __init__(self, filepath): |
| if zipfile.is_zipfile(filepath): |
| self.archive = zipfile.ZipFile(filepath,"r") |
| elif tarfile.is_tarfile(filepath): |
| self.archive = tarfile.TarFileCompat(filepath,"r") |
| else: |
| raise AssertionError("filepath '%s' is not a zip or tar " % filepath) |
| self.expires = None |
| self.last_modified = time.time() |
| self.cache = {} |
| |
| def cache_control(self, **kwargs): |
| self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None |
| return self |
| |
| def __call__(self, environ, start_response): |
| path = environ.get("PATH_INFO","") |
| if path.startswith("/"): |
| path = path[1:] |
| application = self.cache.get(path) |
| if application: |
| return application(environ, start_response) |
| try: |
| info = self.archive.getinfo(path) |
| except KeyError: |
| exc = HTTPNotFound("The file requested, '%s', was not found." % path) |
| return exc.wsgi_application(environ, start_response) |
| if info.filename.endswith("/"): |
| exc = HTTPNotFound("Path requested, '%s', is not a file." % path) |
| return exc.wsgi_application(environ, start_response) |
| content_type, content_encoding = mimetypes.guess_type(info.filename) |
| # 'None' is not a valid content-encoding, so don't set the header if |
| # mimetypes.guess_type returns None |
| if content_encoding is not None: |
| app = DataApp(None, content_type = content_type, |
| content_encoding = content_encoding) |
| else: |
| app = DataApp(None, content_type = content_type) |
| app.set_content(self.archive.read(path), |
| time.mktime(info.date_time + (0,0,0))) |
| self.cache[path] = app |
| app.expires = self.expires |
| return app(environ, start_response) |
| |