Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Worker fixes for deletion #3130

Merged
merged 2 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
},
})
window.location.replace(
`${window.location.origin}/dashboard/datasets`,
`${window.location.origin}/search`,

Check warning on line 35 in packages/openneuro-app/src/scripts/datalad/mutations/delete.jsx

View check run for this annotation

Codecov / codecov/patch

packages/openneuro-app/src/scripts/datalad/mutations/delete.jsx#L35

Added line #L35 was not covered by tests
)
})}
>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
},
})
window.location.replace(
`${window.location.origin}/dashboard/datasets`,
`${window.location.origin}/search`,

Check warning on line 35 in packages/openneuro-app/src/scripts/dataset/mutations/delete.jsx

View check run for this annotation

Codecov / codecov/patch

packages/openneuro-app/src/scripts/dataset/mutations/delete.jsx#L35

Added line #L35 was not covered by tests
)
})}
>
Expand Down
19 changes: 8 additions & 11 deletions services/datalad/datalad_service/handlers/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import asyncio
import os

import aiofiles.os
import falcon
import pygit2

Expand All @@ -19,7 +19,7 @@

async def on_get(self, req, resp, dataset):
ds_path = self.store.get_dataset_path(dataset)
if (os.path.isdir(ds_path)):
if await aiofiles.os.path.isdir(ds_path):
dataset_description = {
'accession_number': dataset,
}
Expand All @@ -32,7 +32,7 @@

async def on_post(self, req, resp, dataset):
ds_path = self.store.get_dataset_path(dataset)
if (os.path.isdir(ds_path)):
if await aiofiles.os.path.isdir(ds_path):
resp.media = {'error': 'dataset already exists'}
resp.status = falcon.HTTP_CONFLICT
else:
Expand All @@ -48,15 +48,12 @@

async def on_delete(self, req, resp, dataset):
dataset_path = self.store.get_dataset_path(dataset)
async def async_delete():
await delete_siblings(dataset)
await delete_dataset(dataset_path)

try:
# Don't block before responding
asyncio.run_task(async_delete())
if await aiofiles.os.path.exists(dataset_path):
await asyncio.gather(delete_siblings(dataset), delete_dataset(dataset_path))

Check warning on line 53 in services/datalad/datalad_service/handlers/dataset.py

View check run for this annotation

Codecov / codecov/patch

services/datalad/datalad_service/handlers/dataset.py#L52-L53

Added lines #L52 - L53 were not covered by tests

resp.media = {}
resp.status = falcon.HTTP_OK
except:
resp.media = {'error': 'dataset not found'}
else:
resp.media = {'error': 'dataset does not exist'}

Check warning on line 58 in services/datalad/datalad_service/handlers/dataset.py

View check run for this annotation

Codecov / codecov/patch

services/datalad/datalad_service/handlers/dataset.py#L58

Added line #L58 was not covered by tests
resp.status = falcon.HTTP_NOT_FOUND
27 changes: 13 additions & 14 deletions services/datalad/datalad_service/tasks/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os.path
import re
from concurrent.futures import ProcessPoolExecutor

import pygit2
import boto3
Expand All @@ -24,6 +25,9 @@
logger = logging.getLogger('datalad_service.' + __name__)


delete_executor = ProcessPoolExecutor(4)


def github_sibling(dataset_path, dataset_id):
"""
Find a GitHub remote or create a new repo and configure the remote.
Expand Down Expand Up @@ -110,7 +114,13 @@
return remote_id_A == remote_id_B and tree_id_A == tree_id_B


async def delete_s3_sibling(dataset_id):
def delete_s3_sibling(dataset_id):
"""Run S3 sibling deletion in another process to avoid blocking any callers"""
delete_executor.submit(delete_s3_sibling_executor, dataset_id)

Check warning on line 119 in services/datalad/datalad_service/tasks/publish.py

View check run for this annotation

Codecov / codecov/patch

services/datalad/datalad_service/tasks/publish.py#L119

Added line #L119 was not covered by tests


def delete_s3_sibling_executor(dataset_id):
"""Delete all versions of a dataset from S3."""
try:
client = boto3.client(
's3',
Expand All @@ -124,8 +134,6 @@
versions.extend(response.get('DeleteMarkers', []))
object_delete_list.extend(
[{'VersionId': version['VersionId'], 'Key': version['Key']} for version in versions])
# Yield after each request
await asyncio.sleep(0)
for i in range(0, len(object_delete_list), 1000):
client.delete_objects(
Bucket=get_s3_bucket(),
Expand All @@ -134,8 +142,6 @@
'Quiet': True
}
)
# Yield after each request
await asyncio.sleep(0)
except Exception as e:
raise Exception(
f'Attempt to delete dataset {dataset_id} from {get_s3_remote()} has failed. ({e})')
Expand All @@ -156,15 +162,8 @@


async def delete_siblings(dataset_id):
try:
await delete_s3_sibling(dataset_id)
except:
pass
await asyncio.sleep(0)
try:
await delete_github_sibling(dataset_id)
except:
pass
delete_s3_sibling(dataset_id)
await delete_github_sibling(dataset_id)

Check warning on line 166 in services/datalad/datalad_service/tasks/publish.py

View check run for this annotation

Codecov / codecov/patch

services/datalad/datalad_service/tasks/publish.py#L165-L166

Added lines #L165 - L166 were not covered by tests


def monitor_remote_configs(dataset_path):
Expand Down
Loading