Spaces:
Runtime error
Runtime error
| import logging | |
| import mimetypes | |
| import os | |
| from collections import defaultdict | |
| from typing import Callable, Dict, Iterable, List, Optional, Tuple | |
| from pip._vendor.packaging.utils import ( | |
| InvalidSdistFilename, | |
| InvalidVersion, | |
| InvalidWheelFilename, | |
| canonicalize_name, | |
| parse_sdist_filename, | |
| parse_wheel_filename, | |
| ) | |
| from pip._internal.models.candidate import InstallationCandidate | |
| from pip._internal.models.link import Link | |
| from pip._internal.utils.urls import path_to_url, url_to_path | |
| from pip._internal.vcs import is_url | |
| logger = logging.getLogger(__name__) | |
| FoundCandidates = Iterable[InstallationCandidate] | |
| FoundLinks = Iterable[Link] | |
| CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]] | |
| PageValidator = Callable[[Link], bool] | |
| class LinkSource: | |
| def link(self) -> Optional[Link]: | |
| """Returns the underlying link, if there's one.""" | |
| raise NotImplementedError() | |
| def page_candidates(self) -> FoundCandidates: | |
| """Candidates found by parsing an archive listing HTML file.""" | |
| raise NotImplementedError() | |
| def file_links(self) -> FoundLinks: | |
| """Links found by specifying archives directly.""" | |
| raise NotImplementedError() | |
| def _is_html_file(file_url: str) -> bool: | |
| return mimetypes.guess_type(file_url, strict=False)[0] == "text/html" | |
| class _FlatDirectoryToUrls: | |
| """Scans directory and caches results""" | |
| def __init__(self, path: str) -> None: | |
| self._path = path | |
| self._page_candidates: List[str] = [] | |
| self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list) | |
| self._scanned_directory = False | |
| def _scan_directory(self) -> None: | |
| """Scans directory once and populates both page_candidates | |
| and project_name_to_urls at the same time | |
| """ | |
| for entry in os.scandir(self._path): | |
| url = path_to_url(entry.path) | |
| if _is_html_file(url): | |
| self._page_candidates.append(url) | |
| continue | |
| # File must have a valid wheel or sdist name, | |
| # otherwise not worth considering as a package | |
| try: | |
| project_filename = parse_wheel_filename(entry.name)[0] | |
| except (InvalidWheelFilename, InvalidVersion): | |
| try: | |
| project_filename = parse_sdist_filename(entry.name)[0] | |
| except (InvalidSdistFilename, InvalidVersion): | |
| continue | |
| self._project_name_to_urls[project_filename].append(url) | |
| self._scanned_directory = True | |
| def page_candidates(self) -> List[str]: | |
| if not self._scanned_directory: | |
| self._scan_directory() | |
| return self._page_candidates | |
| def project_name_to_urls(self) -> Dict[str, List[str]]: | |
| if not self._scanned_directory: | |
| self._scan_directory() | |
| return self._project_name_to_urls | |
| class _FlatDirectorySource(LinkSource): | |
| """Link source specified by ``--find-links=<path-to-dir>``. | |
| This looks the content of the directory, and returns: | |
| * ``page_candidates``: Links listed on each HTML file in the directory. | |
| * ``file_candidates``: Archives in the directory. | |
| """ | |
| _paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {} | |
| def __init__( | |
| self, | |
| candidates_from_page: CandidatesFromPage, | |
| path: str, | |
| project_name: str, | |
| ) -> None: | |
| self._candidates_from_page = candidates_from_page | |
| self._project_name = canonicalize_name(project_name) | |
| # Get existing instance of _FlatDirectoryToUrls if it exists | |
| if path in self._paths_to_urls: | |
| self._path_to_urls = self._paths_to_urls[path] | |
| else: | |
| self._path_to_urls = _FlatDirectoryToUrls(path=path) | |
| self._paths_to_urls[path] = self._path_to_urls | |
| def link(self) -> Optional[Link]: | |
| return None | |
| def page_candidates(self) -> FoundCandidates: | |
| for url in self._path_to_urls.page_candidates: | |
| yield from self._candidates_from_page(Link(url)) | |
| def file_links(self) -> FoundLinks: | |
| for url in self._path_to_urls.project_name_to_urls[self._project_name]: | |
| yield Link(url) | |
| class _LocalFileSource(LinkSource): | |
| """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``. | |
| If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to | |
| the option, it is converted to a URL first. This returns: | |
| * ``page_candidates``: Links listed on an HTML file. | |
| * ``file_candidates``: The non-HTML file. | |
| """ | |
| def __init__( | |
| self, | |
| candidates_from_page: CandidatesFromPage, | |
| link: Link, | |
| ) -> None: | |
| self._candidates_from_page = candidates_from_page | |
| self._link = link | |
| def link(self) -> Optional[Link]: | |
| return self._link | |
| def page_candidates(self) -> FoundCandidates: | |
| if not _is_html_file(self._link.url): | |
| return | |
| yield from self._candidates_from_page(self._link) | |
| def file_links(self) -> FoundLinks: | |
| if _is_html_file(self._link.url): | |
| return | |
| yield self._link | |
| class _RemoteFileSource(LinkSource): | |
| """``--find-links=<url>`` or ``--[extra-]index-url=<url>``. | |
| This returns: | |
| * ``page_candidates``: Links listed on an HTML file. | |
| * ``file_candidates``: The non-HTML file. | |
| """ | |
| def __init__( | |
| self, | |
| candidates_from_page: CandidatesFromPage, | |
| page_validator: PageValidator, | |
| link: Link, | |
| ) -> None: | |
| self._candidates_from_page = candidates_from_page | |
| self._page_validator = page_validator | |
| self._link = link | |
| def link(self) -> Optional[Link]: | |
| return self._link | |
| def page_candidates(self) -> FoundCandidates: | |
| if not self._page_validator(self._link): | |
| return | |
| yield from self._candidates_from_page(self._link) | |
| def file_links(self) -> FoundLinks: | |
| yield self._link | |
| class _IndexDirectorySource(LinkSource): | |
| """``--[extra-]index-url=<path-to-directory>``. | |
| This is treated like a remote URL; ``candidates_from_page`` contains logic | |
| for this by appending ``index.html`` to the link. | |
| """ | |
| def __init__( | |
| self, | |
| candidates_from_page: CandidatesFromPage, | |
| link: Link, | |
| ) -> None: | |
| self._candidates_from_page = candidates_from_page | |
| self._link = link | |
| def link(self) -> Optional[Link]: | |
| return self._link | |
| def page_candidates(self) -> FoundCandidates: | |
| yield from self._candidates_from_page(self._link) | |
| def file_links(self) -> FoundLinks: | |
| return () | |
| def build_source( | |
| location: str, | |
| *, | |
| candidates_from_page: CandidatesFromPage, | |
| page_validator: PageValidator, | |
| expand_dir: bool, | |
| cache_link_parsing: bool, | |
| project_name: str, | |
| ) -> Tuple[Optional[str], Optional[LinkSource]]: | |
| path: Optional[str] = None | |
| url: Optional[str] = None | |
| if os.path.exists(location): # Is a local path. | |
| url = path_to_url(location) | |
| path = location | |
| elif location.startswith("file:"): # A file: URL. | |
| url = location | |
| path = url_to_path(location) | |
| elif is_url(location): | |
| url = location | |
| if url is None: | |
| msg = ( | |
| "Location '%s' is ignored: " | |
| "it is either a non-existing path or lacks a specific scheme." | |
| ) | |
| logger.warning(msg, location) | |
| return (None, None) | |
| if path is None: | |
| source: LinkSource = _RemoteFileSource( | |
| candidates_from_page=candidates_from_page, | |
| page_validator=page_validator, | |
| link=Link(url, cache_link_parsing=cache_link_parsing), | |
| ) | |
| return (url, source) | |
| if os.path.isdir(path): | |
| if expand_dir: | |
| source = _FlatDirectorySource( | |
| candidates_from_page=candidates_from_page, | |
| path=path, | |
| project_name=project_name, | |
| ) | |
| else: | |
| source = _IndexDirectorySource( | |
| candidates_from_page=candidates_from_page, | |
| link=Link(url, cache_link_parsing=cache_link_parsing), | |
| ) | |
| return (url, source) | |
| elif os.path.isfile(path): | |
| source = _LocalFileSource( | |
| candidates_from_page=candidates_from_page, | |
| link=Link(url, cache_link_parsing=cache_link_parsing), | |
| ) | |
| return (url, source) | |
| logger.warning( | |
| "Location '%s' is ignored: it is neither a file nor a directory.", | |
| location, | |
| ) | |
| return (url, None) | |