Skip to content

Commit

Permalink
Merge pull request #113 from lean-dojo/dev
Browse files Browse the repository at this point in the history
clean up network requests
  • Loading branch information
Kaiyu Yang authored Dec 14, 2023
2 parents d768c06 + 389fe3b commit a318a7a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/lean_dojo/data_extraction/lean.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
@cache
def _to_commit_hash(repo: Repository, label: str) -> str:
"""Convert a tag or branch to a commit hash."""
logger.debug(f"Querying the commit hash for {repo.name} {label}")

for branch in repo.get_branches():
if branch.name == label:
return branch.commit.sha
Expand Down Expand Up @@ -402,7 +404,6 @@ def __post_init__(self) -> None:
if (self.url, self.commit) in info_cache.tag2commit:
commit = info_cache.tag2commit[(self.url, self.commit)]
else:
logger.debug(f"Querying the commit hash for {self.name} {self.commit}")
commit = _to_commit_hash(self.repo, self.commit)
assert _COMMIT_REGEX.fullmatch(commit), f"Invalid commit hash: {commit}"
info_cache.tag2commit[(self.url, self.commit)] = commit
Expand Down Expand Up @@ -573,7 +574,6 @@ def _parse_lakefile_dependencies(
elif len(rev) == 40 and _COMMIT_REGEX.fullmatch(rev):
commit = rev
else:
logger.debug(f"Querying the commit hash for {url} {rev}")
commit = _to_commit_hash(url_to_repo(url), rev)
assert _COMMIT_REGEX.fullmatch(commit)

Expand Down
14 changes: 9 additions & 5 deletions src/lean_dojo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from github.Repository import Repository
from ray.util.actor_pool import ActorPool
from typing import Tuple, Union, List, Generator, Optional
from functools import cache

from .constants import GITHUB, NUM_WORKERS, TMP_DIR

Expand Down Expand Up @@ -199,8 +198,9 @@ def remove_optional_type(tp: type) -> type:


@cache
def read_url(url: str, num_retries: int = 1) -> str:
def read_url(url: str, num_retries: int = 2) -> str:
"""Read the contents of the URL ``url``. Retry if failed"""
backoff = 1
while True:
try:
with urllib.request.urlopen(url) as f:
Expand All @@ -210,7 +210,8 @@ def read_url(url: str, num_retries: int = 1) -> str:
raise ex
num_retries -= 1
logger.debug(f"Request to {url} failed. Retrying...")
time.sleep(2 - num_retries)
time.sleep(backoff)
backoff *= 2


@cache
Expand Down Expand Up @@ -246,8 +247,10 @@ def normalize_url(url: str) -> str:


@cache
def url_to_repo(url: str, num_retries: int = 1) -> Repository:
def url_to_repo(url: str, num_retries: int = 2) -> Repository:
url = normalize_url(url)
backoff = 1

while True:
try:
return GITHUB.get_repo("/".join(url.split("/")[-2:]))
Expand All @@ -256,7 +259,8 @@ def url_to_repo(url: str, num_retries: int = 1) -> Repository:
raise ex
num_retries -= 1
logger.debug(f'url_to_repo("{url}") failed. Retrying...')
time.sleep(2 - num_retries)
time.sleep(backoff)
backoff *= 2


@cache
Expand Down

0 comments on commit a318a7a

Please sign in to comment.