-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #128 from scientist-softserv/i98-bulkrax
Fix Bulkrax Imports for IiifPrint PDF splitting
- Loading branch information
Showing
5 changed files
with
145 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# frozen_string_literal: true | ||
|
||
# override to add PDF splitting for file sets | ||
module IiifPrint | ||
module Actors | ||
module FileSetActorDecorator | ||
def create_content(file, relation = :original_file, from_url: false) | ||
# Spawns asynchronous IngestJob unless ingesting from URL | ||
super | ||
|
||
if from_url | ||
# we have everything we need... queue the job | ||
parent = parent_for(file_set: @file_set) | ||
|
||
if service.iiif_print_split?(work: parent) && service.pdfs?(paths: [file_set.import_url]) | ||
service.queue_job( | ||
work: parent, | ||
file_locations: [file.path], | ||
user: @user, | ||
admin_set_id: parent.admin_set_id | ||
) | ||
end | ||
else | ||
# we don't have the parent yet... save the paths for later use | ||
@pdf_paths = service.pdf_paths(files: [file.id.to_s]) | ||
end | ||
end | ||
|
||
# Override to add PDF splitting | ||
def attach_to_work(work, file_set_params = {}) | ||
# Locks to ensure that only one process is operating on the list at a time. | ||
super | ||
|
||
return if @pdf_paths.blank? | ||
return unless service.iiif_print_split?(work: work) | ||
service.queue_job( | ||
work: work, | ||
file_locations: @pdf_paths, | ||
user: @user, | ||
admin_set_id: work.admin_set_id | ||
) | ||
end | ||
|
||
def service | ||
IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# frozen_string_literal: true | ||
|
||
# Encapsulates methods used for pdf splitting into child works | ||
module IiifPrint | ||
module SplitPdfs | ||
class ChildWorkCreationFromPdfService | ||
# Load an array of paths to pdf files | ||
# @param [Array > Hyrax::Upload file ids] | ||
# @return [Array > String] file paths to temp directory | ||
def self.pdf_paths(files:) | ||
upload_ids = filter_file_ids(files) | ||
return [] if upload_ids.empty? | ||
uploads = Hyrax::UploadedFile.find(upload_ids) | ||
paths = uploads.map(&method(:upload_path)) | ||
pdfs_only_for(paths) | ||
end | ||
|
||
# Is child work splitting defined for model? | ||
# @param [GenericWork, etc] A valid type of hyrax work | ||
# @return [Boolean] | ||
def self.iiif_print_split?(work:) | ||
# defined only if work has include IiifPrint.model_configuration with pdf_split_child_model | ||
return true if work.try(:iiif_print_config)&.pdf_split_child_model | ||
false | ||
end | ||
|
||
# Are there any PDF files? | ||
# @param [Array > String] paths to PDFs | ||
# @return [Boolean] | ||
def self.pdfs?(paths:) | ||
pdf_paths = pdfs_only_for(paths) | ||
return false unless pdf_paths.count.positive? | ||
true | ||
end | ||
|
||
# Submit the job to split PDF into child works | ||
# @param [GenericWork, etc] A valid type of hyrax work | ||
# @param [Array<String>] paths to PDF attachments | ||
# @param [User] user | ||
# @param [Integer] number of pdfs already on existing work's filesets (not yet implemented) | ||
def self.queue_job(work:, file_locations:, user:, admin_set_id:) | ||
work.iiif_print_config.pdf_splitter_job.perform_later( | ||
work, | ||
file_locations, | ||
user, | ||
admin_set_id, | ||
count_existing_pdfs(work) | ||
) | ||
end | ||
|
||
def self.filter_file_ids(input) | ||
Array.wrap(input).select(&:present?) | ||
end | ||
|
||
# Given Hyrax::Upload object, return path to file on local filesystem | ||
def self.upload_path(upload) | ||
# so many layers to this onion: | ||
upload.file.file.file | ||
end | ||
|
||
# TODO: implement a method to count existing PDFs on a work to support | ||
# adding more PDFs to an existing work. | ||
def self.count_existing_pdfs(_work) | ||
0 | ||
end | ||
|
||
# TODO: Consider other methods to identify a PDF file. | ||
# This sub-selection may need to be moved to use mimetype if there | ||
# is a need to support paths not ending in .pdf (i.e. remote_urls) | ||
def self.pdfs_only_for(paths) | ||
paths.select { |path| path.end_with?('.pdf', '.PDF') } | ||
end | ||
end | ||
end | ||
end |