Skip to content

Commit

Permalink
Make sure metadata has Requires-Dist and Provides-Extra
Browse files Browse the repository at this point in the history
egg-info distributions may not have the Requires-Dist and Provides-Extra
fields in their metadata. For consistency and to provide an
unsurprising metadata property, we emulate it by reading requires.txt.
  • Loading branch information
sbidoul committed Jun 6, 2022
1 parent e58a8a5 commit 9c119bc
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 91 deletions.
92 changes: 91 additions & 1 deletion src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import csv
import email.message
import functools
import json
import logging
import pathlib
Expand All @@ -13,6 +14,7 @@
Iterable,
Iterator,
List,
NamedTuple,
Optional,
Tuple,
Union,
Expand All @@ -33,6 +35,7 @@
from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here.
from pip._internal.utils.egg_link import egg_link_path_from_sys_path
from pip._internal.utils.misc import is_local, normalize_path
from pip._internal.utils.packaging import safe_extra
from pip._internal.utils.urls import url_to_path

if TYPE_CHECKING:
Expand Down Expand Up @@ -91,6 +94,12 @@ def _convert_installed_files_path(
return str(pathlib.Path(*info, *entry))


class RequiresEntry(NamedTuple):
requirement: str
extra: str
marker: str


class BaseDistribution(Protocol):
@classmethod
def from_directory(cls, directory: str) -> "BaseDistribution":
Expand Down Expand Up @@ -348,6 +357,17 @@ def read_text(self, path: InfoPath) -> str:
def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
raise NotImplementedError()

def _metadata_impl(self) -> email.message.Message:
raise NotImplementedError()

@functools.lru_cache(maxsize=1)
def _metadata(self) -> email.message.Message:
# When we drop python 3.7 support, move this to the metadata property and use
# functools.cached_property instead of lru_cache.
metadata = self._metadata_impl()
self._add_egg_info_requires(metadata)
return metadata

@property
def metadata(self) -> email.message.Message:
"""Metadata of distribution parsed from e.g. METADATA or PKG-INFO.
Expand All @@ -357,7 +377,7 @@ def metadata(self) -> email.message.Message:
:raises NoneMetadataError: If the metadata file is available, but does
not contain valid metadata.
"""
raise NotImplementedError()
return self._metadata()

@property
def metadata_version(self) -> Optional[str]:
Expand Down Expand Up @@ -451,6 +471,76 @@ def iter_declared_entries(self) -> Optional[Iterator[str]]:
or self._iter_declared_entries_from_legacy()
)

def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
"""Parse a ``requires.txt`` in an egg-info directory.
This is an INI-ish format where an egg-info stores dependencies. A
section name describes extra other environment markers, while each entry
is an arbitrary string (not a key-value pair) representing a dependency
as a requirement string (no markers).
There is a construct in ``importlib.metadata`` called ``Sectioned`` that
does mostly the same, but the format is currently considered private.
"""
try:
content = self.read_text("requires.txt")
except FileNotFoundError:
return
extra = marker = "" # Section-less entries don't have markers.
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"): # Comment; ignored.
continue
if line.startswith("[") and line.endswith("]"): # A section header.
extra, _, marker = line.strip("[]").partition(":")
continue
yield RequiresEntry(requirement=line, extra=extra, marker=marker)

def _iter_egg_info_extras(self) -> Iterable[str]:
"""Get extras from the egg-info directory."""
known_extras = {""}
for entry in self._iter_requires_txt_entries():
if entry.extra in known_extras:
continue
known_extras.add(entry.extra)
yield entry.extra

def _iter_egg_info_dependencies(self) -> Iterable[str]:
"""Get distribution dependencies from the egg-info directory.
To ease parsing, this converts a legacy dependency entry into a PEP 508
requirement string. Like ``_iter_requires_txt_entries()``, there is code
in ``importlib.metadata`` that does mostly the same, but not do exactly
what we need.
Namely, ``importlib.metadata`` does not normalize the extra name before
putting it into the requirement string, which causes marker comparison
to fail because the dist-info format do normalize. This is consistent in
all currently available PEP 517 backends, although not standardized.
"""
for entry in self._iter_requires_txt_entries():
if entry.extra and entry.marker:
marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"'
elif entry.extra:
marker = f'extra == "{safe_extra(entry.extra)}"'
elif entry.marker:
marker = entry.marker
else:
marker = ""
if marker:
yield f"{entry.requirement} ; {marker}"
else:
yield entry.requirement

def _add_egg_info_requires(self, metadata: email.message.Message) -> None:
"""Add egg-info requires.txt information to the metadata."""
if not metadata.get_all("Requires-Dist"):
for dep in self._iter_egg_info_dependencies():
metadata["Requires-Dist"] = dep
if not metadata.get_all("Provides-Extra"):
for extra in self._iter_egg_info_extras():
metadata["Provides-Extra"] = extra


class BaseEnvironment:
"""An environment containing distributions to introspect."""
Expand Down
92 changes: 5 additions & 87 deletions src/pip/_internal/metadata/importlib/_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,7 @@
import os
import pathlib
import zipfile
from typing import (
Collection,
Dict,
Iterable,
Iterator,
Mapping,
NamedTuple,
Optional,
Sequence,
)
from typing import Collection, Dict, Iterable, Iterator, Mapping, Optional, Sequence

from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
Expand Down Expand Up @@ -92,12 +83,6 @@ def read_text(self, filename: str) -> Optional[str]:
return text


class RequiresEntry(NamedTuple):
requirement: str
extra: str
marker: str


class Distribution(BaseDistribution):
def __init__(
self,
Expand Down Expand Up @@ -187,84 +172,17 @@ def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
# importlib.metadata's EntryPoint structure sasitfies BaseEntryPoint.
return self._dist.entry_points

@property
def metadata(self) -> email.message.Message:
def _metadata_impl(self) -> email.message.Message:
return self._dist.metadata

def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
"""Parse a ``requires.txt`` in an egg-info directory.
This is an INI-ish format where an egg-info stores dependencies. A
section name describes extra other environment markers, while each entry
is an arbitrary string (not a key-value pair) representing a dependency
as a requirement string (no markers).
There is a construct in ``importlib.metadata`` called ``Sectioned`` that
does mostly the same, but the format is currently considered private.
"""
content = self._dist.read_text("requires.txt")
if content is None:
return
extra = marker = "" # Section-less entries don't have markers.
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"): # Comment; ignored.
continue
if line.startswith("[") and line.endswith("]"): # A section header.
extra, _, marker = line.strip("[]").partition(":")
continue
yield RequiresEntry(requirement=line, extra=extra, marker=marker)

def _iter_egg_info_extras(self) -> Iterable[str]:
"""Get extras from the egg-info directory."""
known_extras = {""}
for entry in self._iter_requires_txt_entries():
if entry.extra in known_extras:
continue
known_extras.add(entry.extra)
yield entry.extra

def iter_provided_extras(self) -> Iterable[str]:
iterator = (
self._dist.metadata.get_all("Provides-Extra")
or self._iter_egg_info_extras()
return (
safe_extra(extra) for extra in self.metadata.get_all("Provides-Extra", [])
)
return (safe_extra(extra) for extra in iterator)

def _iter_egg_info_dependencies(self) -> Iterable[str]:
"""Get distribution dependencies from the egg-info directory.
To ease parsing, this converts a legacy dependency entry into a PEP 508
requirement string. Like ``_iter_requires_txt_entries()``, there is code
in ``importlib.metadata`` that does mostly the same, but not do exactly
what we need.
Namely, ``importlib.metadata`` does not normalize the extra name before
putting it into the requirement string, which causes marker comparison
to fail because the dist-info format do normalize. This is consistent in
all currently available PEP 517 backends, although not standardized.
"""
for entry in self._iter_requires_txt_entries():
if entry.extra and entry.marker:
marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"'
elif entry.extra:
marker = f'extra == "{safe_extra(entry.extra)}"'
elif entry.marker:
marker = entry.marker
else:
marker = ""
if marker:
yield f"{entry.requirement} ; {marker}"
else:
yield entry.requirement

def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
req_string_iterator = (
self._dist.metadata.get_all("Requires-Dist")
or self._iter_egg_info_dependencies()
)
contexts: Sequence[Dict[str, str]] = [{"extra": safe_extra(e)} for e in extras]
for req_string in req_string_iterator:
for req_string in self.metadata.get_all("Requires-Dist", []):
req = Requirement(req_string)
if not req.marker:
yield req
Expand Down
3 changes: 1 addition & 2 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,7 @@ def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
name, _, value = str(entry_point).partition("=")
yield EntryPoint(name=name.strip(), value=value.strip(), group=group)

@property
def metadata(self) -> email.message.Message:
def _metadata_impl(self) -> email.message.Message:
"""
:raises NoneMetadataError: if the distribution reports `has_metadata()`
True but `get_metadata()` returns None.
Expand Down
29 changes: 28 additions & 1 deletion tests/unit/metadata/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from pathlib import Path
from typing import cast
from unittest import mock

import pytest
from pip._vendor.packaging.utils import NormalizedName

from pip._internal.metadata import BaseDistribution
from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, ArchiveInfo


Expand Down Expand Up @@ -40,6 +41,32 @@ class FakeDistribution(BaseDistribution):
)


def test_metadata_reads_egg_info_requires_txt(tmp_path: Path) -> None:
"""Check Requires-Dist is obtained from requires.txt if absent in PKG-INFO."""
egg_info_path = tmp_path / "whatever.egg-info"
egg_info_path.mkdir()
dist = get_directory_distribution(str(egg_info_path))
assert dist.installed_with_setuptools_egg_info
pkg_info_path = egg_info_path / "PKG-INFO"
pkg_info_path.write_text("Name: whatever\n")
egg_info_path.joinpath("requires.txt").write_text("pkga\npkgb\n")
assert dist.metadata.get_all("Requires-Dist") == ["pkga", "pkgb"]


def test_metadata_pkg_info_requires_priority(tmp_path: Path) -> None:
"""Check Requires-Dist in PKG-INFO has priority over requires.txt."""
egg_info_path = tmp_path / "whatever.egg-info"
egg_info_path.mkdir()
dist = get_directory_distribution(str(egg_info_path))
assert dist.installed_with_setuptools_egg_info
pkg_info_path = egg_info_path / "PKG-INFO"
pkg_info_path.write_text(
"Name: whatever\nRequires-Dist: pkgc\nRequires-Dist: pkgd\n"
)
egg_info_path.joinpath("requires.txt").write_text("pkga\npkgb\n")
assert dist.metadata.get_all("Requires-Dist") == ["pkgc", "pkgd"]


@mock.patch.object(
BaseDistribution,
"read_text",
Expand Down

0 comments on commit 9c119bc

Please sign in to comment.