Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow pagination and implement rate throttling #42

Merged
merged 5 commits into from
Feb 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 52 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import asyncio
import re
import time
import os
from asyncio import Semaphore
from datetime import datetime
from datetime import datetime, timedelta
from enum import Enum
from fnmatch import fnmatch
from sys import argv
Expand Down Expand Up @@ -67,6 +69,21 @@ class PackageResponse(BaseModel):
updated_at: datetime | None


async def wait_for_ratelimit(*, response: Response, eligible_for_secondary_limit: bool = False) -> None:
ratelimit_remaining = int(response.headers['x-ratelimit-remaining'])
if ratelimit_remaining == 0:
print('ratelimit exceeded')
ratelimit_reset = datetime.fromtimestamp(int(response.headers['x-ratelimit-reset']))
delta = ratelimit_reset - datetime.now()
if delta > timedelta(0):
print(f'sleeping for {delta}s')
time.sleep(delta.total_seconds())
print('done sleeping')
elif eligible_for_secondary_limit:
# https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits
time.sleep(1)


async def list_org_packages(*, org_name: str, http_client: AsyncClient) -> list[PackageResponse]:
"""
List all packages, for an organization.
Expand Down Expand Up @@ -120,11 +137,11 @@ async def list_org_package_versions(
:param http_client: HTTP client.
:return: List of image objects.
"""
response = await http_client.get(
f'{BASE_URL}/orgs/{org_name}/packages/container/{image_name.encoded}/versions?per_page=100'
packages = await get_all_pages(
url=f'{BASE_URL}/orgs/{org_name}/packages/container/{image_name.encoded}/versions?per_page=100',
http_client=http_client,
)
response.raise_for_status()
return [PackageVersionResponse(**i) for i in response.json()]
return [PackageVersionResponse(**i) for i in packages]


async def list_package_versions(*, image_name: ImageName, http_client: AsyncClient) -> list[PackageVersionResponse]:
Expand All @@ -135,9 +152,34 @@ async def list_package_versions(*, image_name: ImageName, http_client: AsyncClie
:param http_client: HTTP client.
:return: List of image objects.
"""
response = await http_client.get(f'{BASE_URL}/user/packages/container/{image_name.encoded}/versions?per_page=100')
response.raise_for_status()
return [PackageVersionResponse(**i) for i in response.json()]
packages = await get_all_pages(
url=f'{BASE_URL}/user/packages/container/{image_name.encoded}/versions?per_page=100', http_client=http_client
)
return [PackageVersionResponse(**i) for i in packages]


async def get_all_pages(*, url: str, http_client: AsyncClient) -> list[dict]:
"""
Accumulate all pages of a paginated API endpoint.
:param url: The full API URL
:param http_client: HTTP client.
:return: List of objects.
"""
result = []
rel_regex = re.compile(r'<([^<>]*)>; rel="(\w+)"')
rels = {'next': url}

while 'next' in rels:
response = await http_client.get(rels['next'])
response.raise_for_status()
result.extend(response.json())

rels = {rel: url for url, rel in rel_regex.findall(response.headers['link'])}

await wait_for_ratelimit(response=response)

return result


def post_deletion_output(*, response: Response, image_name: ImageName, version_id: int) -> None:
Expand Down Expand Up @@ -176,6 +218,7 @@ async def delete_org_package_versions(
await semaphore.acquire()
try:
response = await http_client.delete(url)
await wait_for_ratelimit(response=response, eligible_for_secondary_limit=True)
post_deletion_output(response=response, image_name=image_name, version_id=version_id)
except TimeoutException as e:
print(f'Request to delete {image_name.value} timed out with error `{e}`')
Expand All @@ -198,6 +241,7 @@ async def delete_package_versions(
await semaphore.acquire()
try:
response = await http_client.delete(url)
await wait_for_ratelimit(response=response, eligible_for_secondary_limit=True)
post_deletion_output(response=response, image_name=image_name, version_id=version_id)
except TimeoutException as e:
print(f'Request to delete {image_name.value} timed out with error `{e}`')
Expand Down
5 changes: 5 additions & 0 deletions main_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
from main import post_deletion_output

mock_response = Mock()
mock_response.headers = {'x-ratelimit-remaining': '1', 'link': ''}
mock_response.json.return_value = []
mock_response.is_error = False
mock_bad_response = Mock()
mock_bad_response.headers = {'x-ratelimit-remaining': '1', 'link': ''}
mock_bad_response.is_error = True
mock_http_client = AsyncMock()
mock_http_client.get.return_value = mock_response
Expand Down Expand Up @@ -450,11 +452,13 @@ async def test_public_images_with_more_than_5000_downloads(mocker, capsys):
them once at the end, with the necessary context to act on them if wanted.
"""
mock_delete_response = Mock()
mock_delete_response.headers = {'x-ratelimit-remaining': '1', 'link': ''}
mock_delete_response.is_error = True
mock_delete_response.status_code = 400
mock_delete_response.json = lambda: {'message': main.GITHUB_ASSISTANCE_MSG}

mock_list_response = Mock()
mock_list_response.headers = {'x-ratelimit-remaining': '1', 'link': ''}
mock_list_response.is_error = True
mock_list_response.status_code = 400

Expand Down Expand Up @@ -554,6 +558,7 @@ def json(self):
@pytest.mark.asyncio
async def test_outputs_are_set(mocker):
mock_list_response = Mock()
mock_list_response.headers = {'x-ratelimit-remaining': '1', 'link': ''}
mock_list_response.is_error = True
mock_list_response.status_code = 200
mock_list_response.json = lambda: [
Expand Down