diff --git a/.github/workflows/feature-branch-deploy.yml b/.github/workflows/feature-branch-deploy.yml index 68546ba46..69b2b6a2b 100644 --- a/.github/workflows/feature-branch-deploy.yml +++ b/.github/workflows/feature-branch-deploy.yml @@ -84,7 +84,7 @@ jobs: - name: Build, tag and push docker images ECR env: REGISTRY: ${{ steps.login-ecr.outputs.registry }} - IMAGE_TAG: ${{ vars.IMAGE_TAG }} + IMAGE_TAG: ${{ secrets.IMAGE_TAG }} CLUSTER: ${{ secrets.CLUSTER_TEST }} TARGET_SERVICE: ${{ secrets.TARGET_SERVICE }} run: | diff --git a/concordia/admin/forms.py b/concordia/admin/forms.py index 9a07fb959..770c89ec2 100644 --- a/concordia/admin/forms.py +++ b/concordia/admin/forms.py @@ -37,6 +37,10 @@ class AdminProjectBulkImportForm(forms.Form): label="Spreadsheet containing the campaigns, projects, and items to import", ) + redownload = forms.BooleanField( + required=False, label="Should existing items be redownloaded?" + ) + class AdminRedownloadImagesForm(forms.Form): spreadsheet_file = forms.FileField( diff --git a/concordia/admin/views.py b/concordia/admin/views.py index d514f6c84..bd15b06a9 100644 --- a/concordia/admin/views.py +++ b/concordia/admin/views.py @@ -132,7 +132,7 @@ def project_level_export(request): form = AdminProjectBulkImportForm() context["campaigns"] = all_campaigns = [] context["projects"] = all_projects = [] - id = request.GET.get("id") + idx = request.GET.get("id") if request.method == "POST": project_list = request.POST.getlist("project_name") @@ -179,15 +179,15 @@ def project_level_export(request): ) as export_base_dir: return do_bagit_export(assets, export_base_dir, export_filename_base) - if id is not None: + if idx is not None: context["campaigns"] = [] form = AdminProjectBulkImportForm() - projects = Project.objects.filter(campaign_id=int(id)) + projects = Project.objects.filter(campaign_id=int(idx)) for project in projects: proj_dict = {} proj_dict["title"] = project.title proj_dict["id"] = project.pk - proj_dict["campaign_id"] = id + proj_dict["campaign_id"] = idx all_projects.append(proj_dict) else: @@ -223,11 +223,11 @@ def celery_task_review(request): } celery = Celery("concordia") celery.config_from_object("django.conf:settings", namespace="CELERY") - id = request.GET.get("id") + idx = request.GET.get("id") - if id is not None: + if idx is not None: form = AdminProjectBulkImportForm() - projects = Project.objects.filter(campaign_id=int(id)) + projects = Project.objects.filter(campaign_id=int(idx)) for project in projects: asset_successful = 0 asset_failure = 0 @@ -236,7 +236,7 @@ def celery_task_review(request): proj_dict = {} proj_dict["title"] = project.title proj_dict["id"] = project.pk - proj_dict["campaign_id"] = id + proj_dict["campaign_id"] = idx messages.info(request, f"{project.title}") importjobs = ImportJob.objects.filter(project_id=project.pk).order_by( "-created" @@ -398,7 +398,7 @@ def admin_bulk_import_review(request): ) all_urls.append(urls) - for i, val in enumerate(all_urls): + for _i, val in enumerate(all_urls): return_result = fetch_all_urls(val) for res in return_result[0]: messages.info(request, f"{res}") @@ -437,6 +437,7 @@ def admin_bulk_import_view(request): if form.is_valid(): context["import_jobs"] = import_jobs = [] + redownload = form.cleaned_data.get("redownload", False) rows = slurp_excel(request.FILES["spreadsheet_file"]) required_fields = [ @@ -545,7 +546,7 @@ def admin_bulk_import_view(request): try: import_jobs.append( import_items_into_project_from_url( - request.user, project, url + request.user, project, url, redownload ) ) diff --git a/concordia/models.py b/concordia/models.py index e38a335a5..9cd0e7d99 100644 --- a/concordia/models.py +++ b/concordia/models.py @@ -873,3 +873,30 @@ class CampaignRetirementProgress(models.Model): def __str__(self): return f"Removal progress for {self.campaign}" + + +class Card(models.Model): + image = models.ImageField(upload_to="card_images", blank=True, null=True) + title = models.CharField(max_length=80) + body_text = models.TextField(blank=True) + + class Meta: + abstract = True + + +class CardFamily(models.Model): + slug = models.SlugField(max_length=80, unique=True, allow_unicode=True) + default = models.BooleanField(default=False) + cards = models.ManyToManyField(Card, through="TutorialCard") + + class Meta: + abstract = True + + +class TutorialCard(models.Model): + card = models.ForeignKey(Card, on_delete=models.CASCADE) + tutorial = models.ForeignKey(CardFamily, on_delete=models.CASCADE) + order = models.IntegerField(default=0) + + class Meta: + abstract = True diff --git a/concordia/templates/admin/bulk_import.html b/concordia/templates/admin/bulk_import.html index 870e95eb2..6682ecae4 100644 --- a/concordia/templates/admin/bulk_import.html +++ b/concordia/templates/admin/bulk_import.html @@ -55,6 +55,7 @@

Import Tasks

  • Items will be added to projects but items which have already been imported into that project will be skipped. + (Unless the redownload option is checked below.) This means that you can add multiple items to a project both by having the “Import URLs” cell contain multiple URLs or by duplicating the row with new ”Import URLs” diff --git a/importer/tasks.py b/importer/tasks.py index cfb01e780..7567187c3 100644 --- a/importer/tasks.py +++ b/importer/tasks.py @@ -12,6 +12,7 @@ import requests from celery import group +from django import forms from django.core.cache import cache from django.db.transaction import atomic from django.utils.text import slugify @@ -270,7 +271,9 @@ def import_item_count_from_url(import_url): return f"Unhandled exception importing {import_url} {exc}", 0 -def import_items_into_project_from_url(requesting_user, project, import_url): +def import_items_into_project_from_url( + requesting_user, project, import_url, redownload=False +): """ Given a loc.gov URL, return the task ID for the import task """ @@ -291,26 +294,26 @@ def import_items_into_project_from_url(requesting_user, project, import_url): import_job.save() if url_type == "item": - create_item_import_task.delay(import_job.pk, import_url) + create_item_import_task.delay(import_job.pk, import_url, redownload) else: # Both collections and search results return the same format JSON # reponse so we can use the same code to process them: - import_collection_task.delay(import_job.pk) + import_collection_task.delay(import_job.pk, redownload) return import_job @app.task(bind=True) -def import_collection_task(self, import_job_pk): +def import_collection_task(self, import_job_pk, redownload=False): import_job = ImportJob.objects.get(pk=import_job_pk) - return import_collection(self, import_job) + return import_collection(self, import_job, redownload) @update_task_status -def import_collection(self, import_job): +def import_collection(self, import_job, redownload=False): item_info = get_collection_items(normalize_collection_url(import_job.url)) for _, item_url in item_info: - create_item_import_task.delay(import_job.pk, item_url) + create_item_import_task.delay(import_job.pk, item_url, redownload) @app.task( @@ -343,7 +346,7 @@ def redownload_image_task(self, asset_pk): retry_kwargs={"max_retries": 3}, rate_limit=2, ) -def create_item_import_task(self, import_job_pk, item_url): +def create_item_import_task(self, import_job_pk, item_url, redownload=False): """ Create an ImportItem record using the provided import job and URL by requesting the metadata from the URL @@ -367,14 +370,27 @@ def create_item_import_task(self, import_job_pk, item_url): url=item_url, item=item ) - if not item_created: - logger.warning("Not reprocessing existing item %s", item) - import_item.status = "Not reprocessing existing item %s" % item - import_item.completed = import_item.last_started = now() - import_item.task_id = self.request.id - import_item.full_clean() - import_item.save() - return + if not item_created and redownload is False: + # Item has already been imported and we're not redownloading + # all items + asset_urls, item_resource_url = get_asset_urls_from_item_resources( + item.metadata.get("resources", []) + ) + if item.asset_set.count() >= len(asset_urls): + # The item has all of its assets, so we can skip it + logger.warning("Not reprocessing existing item with all asssets: %s", item) + import_item.status = ( + "Not reprocessing existing item with all assets: %s" % item + ) + import_item.completed = import_item.last_started = now() + import_item.task_id = self.request.id + import_item.full_clean() + import_item.save() + return + else: + # The item is missing one or more of its assets, so we will reprocess it + # to import the missing asssets + logger.warning("Reprocessing existing item %s that is missing assets", item) import_item.item.metadata.update(item_data) @@ -423,8 +439,12 @@ def import_item(self, import_item): resource_url=item_resource_url, storage_image="/".join([relative_asset_file_path, f"{idx}.jpg"]), ) - item_asset.full_clean() - item_assets.append(item_asset) + try: + item_asset.full_clean() + item_assets.append(item_asset) + except forms.ValidationError: + # Asset already exists + pass Asset.objects.bulk_create(item_assets)