finally listen to the maintainer and cache parse_links() by url :)

pypa · Feb 20, 2020 · acdd06e · acdd06e
1 parent 09e7cd9
commit acdd06e
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 40 deletions.
diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
@@ -281,12 +281,11 @@ def __init__(self, page):
     def __eq__(self, other):
         # type: (object) -> bool
         return (isinstance(other, type(self)) and
-                self.page.content == other.page.content and
-                self.page.encoding == other.page.encoding)
+                self.page.url == other.page.url)
 
     def __hash__(self):
         # type: () -> int
-        return hash((self.page.content, self.page.encoding))
+        return hash(self.page.url)
 
 
 def with_cached_html_pages(
@@ -371,20 +370,6 @@ def _make_html_page(response):
     return HTMLPage(response.content, encoding=encoding, url=response.url)
 
 
-def with_cached_link_fetch(
-    fn,  # type: Callable[..., Optional[HTMLPage]]
-):
-    # type: (...) -> Callable[..., Optional[HTMLPage]]
-
-    @_lru_cache(maxsize=None)
-    def wrapper(link, session=None):
-        # type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
-        return fn(link, session=session)
-
-    return wrapper
-
-
-@with_cached_link_fetch
 def _get_html_page(link, session=None):
     # type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
     if session is None:

diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
@@ -1,5 +1,6 @@
 import logging
 import os.path
+import uuid
 from textwrap import dedent
 
 import mock
@@ -10,7 +11,6 @@
 from pip._vendor.six.moves.urllib import request as urllib_request
 
 from pip._internal.index.collector import (
-    CacheablePageContent,
     HTMLPage,
     _clean_link,
     _determine_base_url,
@@ -270,15 +270,17 @@ def test_parse_links__yanked_reason(anchor_html, expected):
     page = HTMLPage(
         html_bytes,
         encoding=None,
-        url='https://example.com/simple/',
+        # parse_links() is cached by url, so we inject a random uuid to ensure
+        # the page content isn't cached.
+        url='https://example.com/simple-{}/'.format(uuid.uuid4()),
     )
     links = list(parse_links(page))
     link, = links
     actual = link.yanked_reason
     assert actual == expected
 
 
-def test_parse_links_caches_same_page():
+def test_parse_links_caches_same_page_by_url():
     html = (
         # Mark this as a unicode string for Python 2 since anchor_html
         # can contain non-ascii.
@@ -292,8 +294,10 @@ def test_parse_links_caches_same_page():
         encoding=None,
         url='https://example.com/simple/',
     )
+    # Make a second page with zero content, to ensure that it's not accessed,
+    # because the page was cached by url.
     page_2 = HTMLPage(
-        html_bytes,
+        b'',
         encoding=None,
         url='https://example.com/simple/',
     )
@@ -378,25 +382,6 @@ def test_get_html_page_invalid_scheme(caplog, url, vcs_scheme):
     ]
 
 
-def test_get_html_page_caches_same_link():
-    link = Link('https://example.com/link-1/')
-    session = mock.Mock(PipSession)
-
-    fake_response = make_fake_html_response(link.url)
-    mock_func = mock.patch("pip._internal.index.collector._get_html_response")
-    with mock_func as mock_func:
-        mock_func.return_value = fake_response
-        page_1 = _get_html_page(link, session=session)
-        mock_func.assert_called_once()
-
-    with mock_func as mock_func:
-        page_2 = _get_html_page(link, session=session)
-        # Assert that the result of the cached html page fetch will also then
-        # be cached by parse_links() and @with_cached_html_pages.
-        assert CacheablePageContent(page_1) == CacheablePageContent(page_2)
-        mock_func.assert_not_called()
-
-
 def make_fake_html_response(url):
     """
     Create a fake requests.Response object.