Skip to content

Commit

Permalink
finally listen to the maintainer and cache parse_links() by url :)
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Feb 20, 2020
1 parent 09e7cd9 commit acdd06e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 40 deletions.
19 changes: 2 additions & 17 deletions src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,11 @@ def __init__(self, page):
def __eq__(self, other):
# type: (object) -> bool
return (isinstance(other, type(self)) and
self.page.content == other.page.content and
self.page.encoding == other.page.encoding)
self.page.url == other.page.url)

def __hash__(self):
# type: () -> int
return hash((self.page.content, self.page.encoding))
return hash(self.page.url)


def with_cached_html_pages(
Expand Down Expand Up @@ -371,20 +370,6 @@ def _make_html_page(response):
return HTMLPage(response.content, encoding=encoding, url=response.url)


def with_cached_link_fetch(
fn, # type: Callable[..., Optional[HTMLPage]]
):
# type: (...) -> Callable[..., Optional[HTMLPage]]

@_lru_cache(maxsize=None)
def wrapper(link, session=None):
# type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
return fn(link, session=session)

return wrapper


@with_cached_link_fetch
def _get_html_page(link, session=None):
# type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
if session is None:
Expand Down
31 changes: 8 additions & 23 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os.path
import uuid
from textwrap import dedent

import mock
Expand All @@ -10,7 +11,6 @@
from pip._vendor.six.moves.urllib import request as urllib_request

from pip._internal.index.collector import (
CacheablePageContent,
HTMLPage,
_clean_link,
_determine_base_url,
Expand Down Expand Up @@ -270,15 +270,17 @@ def test_parse_links__yanked_reason(anchor_html, expected):
page = HTMLPage(
html_bytes,
encoding=None,
url='https://example.com/simple/',
# parse_links() is cached by url, so we inject a random uuid to ensure
# the page content isn't cached.
url='https://example.com/simple-{}/'.format(uuid.uuid4()),
)
links = list(parse_links(page))
link, = links
actual = link.yanked_reason
assert actual == expected


def test_parse_links_caches_same_page():
def test_parse_links_caches_same_page_by_url():
html = (
# Mark this as a unicode string for Python 2 since anchor_html
# can contain non-ascii.
Expand All @@ -292,8 +294,10 @@ def test_parse_links_caches_same_page():
encoding=None,
url='https://example.com/simple/',
)
# Make a second page with zero content, to ensure that it's not accessed,
# because the page was cached by url.
page_2 = HTMLPage(
html_bytes,
b'',
encoding=None,
url='https://example.com/simple/',
)
Expand Down Expand Up @@ -378,25 +382,6 @@ def test_get_html_page_invalid_scheme(caplog, url, vcs_scheme):
]


def test_get_html_page_caches_same_link():
link = Link('https://example.com/link-1/')
session = mock.Mock(PipSession)

fake_response = make_fake_html_response(link.url)
mock_func = mock.patch("pip._internal.index.collector._get_html_response")
with mock_func as mock_func:
mock_func.return_value = fake_response
page_1 = _get_html_page(link, session=session)
mock_func.assert_called_once()

with mock_func as mock_func:
page_2 = _get_html_page(link, session=session)
# Assert that the result of the cached html page fetch will also then
# be cached by parse_links() and @with_cached_html_pages.
assert CacheablePageContent(page_1) == CacheablePageContent(page_2)
mock_func.assert_not_called()


def make_fake_html_response(url):
"""
Create a fake requests.Response object.
Expand Down

0 comments on commit acdd06e

Please sign in to comment.