| """Download files with progress indicators. |
| """ |
| import cgi |
| import logging |
| import mimetypes |
| import os |
| |
| from pip._vendor import requests |
| from pip._vendor.requests.models import CONTENT_CHUNK_SIZE |
| |
| from pip._internal.cli.progress_bars import DownloadProgressProvider |
| from pip._internal.models.index import PyPI |
| from pip._internal.network.cache import is_from_cache |
| from pip._internal.network.utils import response_chunks |
| from pip._internal.utils.misc import ( |
| format_size, |
| redact_auth_from_url, |
| splitext, |
| ) |
| from pip._internal.utils.typing import MYPY_CHECK_RUNNING |
| |
| if MYPY_CHECK_RUNNING: |
| from typing import Iterable, Optional |
| |
| from pip._vendor.requests.models import Response |
| |
| from pip._internal.models.link import Link |
| from pip._internal.network.session import PipSession |
| |
| logger = logging.getLogger(__name__) |
| |
| |
| def _get_http_response_size(resp): |
| # type: (Response) -> Optional[int] |
| try: |
| return int(resp.headers['content-length']) |
| except (ValueError, KeyError, TypeError): |
| return None |
| |
| |
| def _prepare_download( |
| resp, # type: Response |
| link, # type: Link |
| progress_bar # type: str |
| ): |
| # type: (...) -> Iterable[bytes] |
| total_length = _get_http_response_size(resp) |
| |
| if link.netloc == PyPI.file_storage_domain: |
| url = link.show_url |
| else: |
| url = link.url_without_fragment |
| |
| logged_url = redact_auth_from_url(url) |
| |
| if total_length: |
| logged_url = '{} ({})'.format(logged_url, format_size(total_length)) |
| |
| if is_from_cache(resp): |
| logger.info("Using cached %s", logged_url) |
| else: |
| logger.info("Downloading %s", logged_url) |
| |
| if logger.getEffectiveLevel() > logging.INFO: |
| show_progress = False |
| elif is_from_cache(resp): |
| show_progress = False |
| elif not total_length: |
| show_progress = True |
| elif total_length > (40 * 1000): |
| show_progress = True |
| else: |
| show_progress = False |
| |
| chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) |
| |
| if not show_progress: |
| return chunks |
| |
| return DownloadProgressProvider( |
| progress_bar, max=total_length |
| )(chunks) |
| |
| |
| def sanitize_content_filename(filename): |
| # type: (str) -> str |
| """ |
| Sanitize the "filename" value from a Content-Disposition header. |
| """ |
| return os.path.basename(filename) |
| |
| |
| def parse_content_disposition(content_disposition, default_filename): |
| # type: (str, str) -> str |
| """ |
| Parse the "filename" value from a Content-Disposition header, and |
| return the default filename if the result is empty. |
| """ |
| _type, params = cgi.parse_header(content_disposition) |
| filename = params.get('filename') |
| if filename: |
| # We need to sanitize the filename to prevent directory traversal |
| # in case the filename contains ".." path parts. |
| filename = sanitize_content_filename(filename) |
| return filename or default_filename |
| |
| |
| def _get_http_response_filename(resp, link): |
| # type: (Response, Link) -> str |
| """Get an ideal filename from the given HTTP response, falling back to |
| the link filename if not provided. |
| """ |
| filename = link.filename # fallback |
| # Have a look at the Content-Disposition header for a better guess |
| content_disposition = resp.headers.get('content-disposition') |
| if content_disposition: |
| filename = parse_content_disposition(content_disposition, filename) |
| ext = splitext(filename)[1] # type: Optional[str] |
| if not ext: |
| ext = mimetypes.guess_extension( |
| resp.headers.get('content-type', '') |
| ) |
| if ext: |
| filename += ext |
| if not ext and link.url != resp.url: |
| ext = os.path.splitext(resp.url)[1] |
| if ext: |
| filename += ext |
| return filename |
| |
| |
| def _http_get_download(session, link): |
| # type: (PipSession, Link) -> Response |
| target_url = link.url.split('#', 1)[0] |
| resp = session.get( |
| target_url, |
| # We use Accept-Encoding: identity here because requests |
| # defaults to accepting compressed responses. This breaks in |
| # a variety of ways depending on how the server is configured. |
| # - Some servers will notice that the file isn't a compressible |
| # file and will leave the file alone and with an empty |
| # Content-Encoding |
| # - Some servers will notice that the file is already |
| # compressed and will leave the file alone and will add a |
| # Content-Encoding: gzip header |
| # - Some servers won't notice anything at all and will take |
| # a file that's already been compressed and compress it again |
| # and set the Content-Encoding: gzip header |
| # By setting this to request only the identity encoding We're |
| # hoping to eliminate the third case. Hopefully there does not |
| # exist a server which when given a file will notice it is |
| # already compressed and that you're not asking for a |
| # compressed file and will then decompress it before sending |
| # because if that's the case I don't think it'll ever be |
| # possible to make this work. |
| headers={"Accept-Encoding": "identity"}, |
| stream=True, |
| ) |
| resp.raise_for_status() |
| return resp |
| |
| |
| class Download(object): |
| def __init__( |
| self, |
| response, # type: Response |
| filename, # type: str |
| chunks, # type: Iterable[bytes] |
| ): |
| # type: (...) -> None |
| self.response = response |
| self.filename = filename |
| self.chunks = chunks |
| |
| |
| class Downloader(object): |
| def __init__( |
| self, |
| session, # type: PipSession |
| progress_bar, # type: str |
| ): |
| # type: (...) -> None |
| self._session = session |
| self._progress_bar = progress_bar |
| |
| def __call__(self, link): |
| # type: (Link) -> Download |
| try: |
| resp = _http_get_download(self._session, link) |
| except requests.HTTPError as e: |
| logger.critical( |
| "HTTP error %s while getting %s", e.response.status_code, link |
| ) |
| raise |
| |
| return Download( |
| resp, |
| _get_http_response_filename(resp, link), |
| _prepare_download(resp, link, self._progress_bar), |
| ) |