Skip to content

Commit

Permalink
Improve speed of reset and delete using the _raw_delete API #729
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Druez <[email protected]>
  • Loading branch information
tdruez committed May 16, 2023
1 parent 4c542d0 commit 7952eb3
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 19 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ v33.0.0 (unreleased)
- Move the "Resources status" panel from the run modal to the project details view.
https://github.com/nexB/scancode.io/issues/370

- Improve the speed of Project ``reset`` and ``delete`` using the _raw_delete model API.
https://github.com/nexB/scancode.io/issues/729

v32.2.0 (2023-04-25)
--------------------

Expand Down
46 changes: 36 additions & 10 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,11 +528,46 @@ def archive(self, remove_input=False, remove_codebase=False, remove_output=False
self.is_archived = True
self.save()

def delete_related_objects(self):
"""
Delete all related object instances using the private `_raw_delete` model API.
This bypass the objects collection, cascade deletions, and signals.
It results in a much faster objects deletion, but it needs to be applied in the
correct models order as the cascading event will not be triggered.
Note that this approach is used in Django's `fast_deletes` but the scanpipe
models are cannot be fast-deleted as they have cascades and relations.
"""
# Use default `delete()` on the DiscoveredPackage model, as the
# `codebase_resources (ManyToManyField)` records need to collected and
# properly deleted first.
# Since this `ManyToManyField` has an implicit model table, we cannot directly
# run the `_raw_delete()` on its QuerySet.
_, deleted_counter = self.discoveredpackages.all().delete()

relationships = [
self.projecterrors,
self.codebaserelations,
self.discovereddependencies,
self.codebaseresources,
self.runs,
]

for qs in relationships:
count = qs.all()._raw_delete(qs.db)
deleted_counter[qs.model._meta.label] = count

return deleted_counter

def delete(self, *args, **kwargs):
"""Delete the `work_directory` along project-related data in the database."""
self._raise_if_run_in_progress()

shutil.rmtree(self.work_directory, ignore_errors=True)

# Start with the optimized deletion of the related objects before calling the
# full `delete()` process.
self.delete_related_objects()

return super().delete(*args, **kwargs)

def reset(self, keep_input=True):
Expand All @@ -542,16 +577,7 @@ def reset(self, keep_input=True):
"""
self._raise_if_run_in_progress()

relationships = [
self.projecterrors,
self.runs,
self.discoveredpackages,
self.discovereddependencies,
self.codebaseresources,
]

for relation in relationships:
relation.all().delete()
self.delete_related_objects()

work_directories = [
self.codebase_path,
Expand Down
3 changes: 3 additions & 0 deletions scanpipe/templates/scanpipe/project_detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -160,5 +160,8 @@

onSubmitOverlay("#add-pipeline-modal form");
onSubmitOverlay("#add-inputs-modal form");
onSubmitOverlay("#modal-archive form");
onSubmitOverlay("#modal-reset form");
onSubmitOverlay("#modal-delete form");
</script>
{% endblock %}
34 changes: 25 additions & 9 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,25 +142,40 @@ def test_scanpipe_project_model_archive(self):
self.assertEqual(0, len(Project.get_root_content(self.project1.codebase_path)))
self.assertEqual(1, len(Project.get_root_content(self.project1.output_path)))

def test_scanpipe_project_model_delete(self):
def test_scanpipe_project_model_delete_related_objects(self):
work_path = self.project1.work_path
self.assertTrue(work_path.exists())

uploaded_file = SimpleUploadedFile("file.ext", content=b"content")
self.project1.write_input_file(uploaded_file)
self.project1.add_pipeline("docker")
resource = CodebaseResource.objects.create(project=self.project1, path="path")
package = DiscoveredPackage.objects.create(project=self.project1)
resource.discovered_packages.add(package)

delete_log = self.project1.delete()
delete_log = self.project1.delete_related_objects()
expected = {
"scanpipe.CodebaseResource": 1,
"scanpipe.DiscoveredPackage": 1,
"scanpipe.DiscoveredPackage_codebase_resources": 1,
"scanpipe.Project": 1,
"scanpipe.DiscoveredPackage": 1,
"scanpipe.ProjectError": 0,
"scanpipe.CodebaseRelation": 0,
"scanpipe.DiscoveredDependency": 0,
"scanpipe.CodebaseResource": 1,
"scanpipe.Run": 1,
}
self.assertEqual(expected, delete_log)

def test_scanpipe_project_model_delete(self):
work_path = self.project1.work_path
self.assertTrue(work_path.exists())

uploaded_file = SimpleUploadedFile("file.ext", content=b"content")
self.project1.write_input_file(uploaded_file)
self.project1.add_pipeline("docker")
resource = CodebaseResource.objects.create(project=self.project1, path="path")
package = DiscoveredPackage.objects.create(project=self.project1)
resource.discovered_packages.add(package)

delete_log = self.project1.delete()
expected = {"scanpipe.Project": 1}
self.assertEqual(expected, delete_log[1])

self.assertFalse(Project.objects.filter(name=self.project1.name).exists())
Expand All @@ -173,8 +188,9 @@ def test_scanpipe_project_model_reset(self):
uploaded_file = SimpleUploadedFile("file.ext", content=b"content")
self.project1.write_input_file(uploaded_file)
self.project1.add_pipeline("docker")
CodebaseResource.objects.create(project=self.project1, path="path")
DiscoveredPackage.objects.create(project=self.project1)
resource = CodebaseResource.objects.create(project=self.project1, path="path")
package = DiscoveredPackage.objects.create(project=self.project1)
resource.discovered_packages.add(package)

self.project1.reset()

Expand Down

0 comments on commit 7952eb3

Please sign in to comment.