Skip to content

Commit

Permalink
Merge pull request #24 from scientist-softserv/upload-actor-i7
Browse files Browse the repository at this point in the history
Refactor IIIF Print Upload Actor
  • Loading branch information
laritakr authored Dec 13, 2022
2 parents e5a9d31 + 3ff0aad commit 20425eb
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 90 deletions.
84 changes: 43 additions & 41 deletions app/actors/iiif_print/actors/iiif_print_upload_actor.rb
Original file line number Diff line number Diff line change
@@ -1,60 +1,27 @@
module IiifPrint
module Actors
class IiifPrintUploadActor < Hyrax::Actors::AbstractActor
# An actor which locates all uploaded PDF paths and
# spins off IiifPrint::CreatePagesJob to split them.
def create(env)
# Ensure that work has title, set from form data if present
# TODO: test what happens when ensure_title is removed.
ensure_title(env)
# If NewspaperIssue, we might have a PDF to split; make a list of
# paths to PDF uploads before next_actor removes them
# from env.attributes, with state kept in instance variable
# until late in "heading back up" phase of the actor stack, where
# correct depositor value is already set on the issue (only
# at that point should we queue the job to create child pages).
@pdf_paths = []
hold_upload_paths(env) if env.curation_concern.class == NewspaperIssue
# pass to next actor, then handle issue uploads after other actors
# that are lower on the stack
hold_upload_paths(env) if responds_to_split?(env.curation_concern)
next_actor.create(env) && after_other_actors(env)
end

def after_other_actors(env)
handle_issue_upload(env) if env.curation_concern.class == NewspaperIssue
# needs to return true to not break actor stack traversal
true
end

# Work must have a title to save, and this actor's .create/.update
# methods run prior to the setting of form data. This ensures
# appropriate title is set on model.
def ensure_title(env)
form_title = env.attributes['title']
return if form_title.nil?
env.curation_concern.title = form_title
end

def update(env)
# Ensure that work has title, set from form data if present
# TODO: test what happens when ensure_title is removed.
ensure_title(env)
@pdf_paths = []
hold_upload_paths(env) if env.curation_concern.class == NewspaperIssue
# pass to next actor, then handle issue uploads after other actors
# that are lower on the stack
hold_upload_paths(env) if responds_to_split?(env.curation_concern)
next_actor.update(env) && after_other_actors(env)
end

def default_admin_set
AdminSet.find_or_create_default_admin_set_id
end

def queue_job(work, paths, user, admin_set_id)
IiifPrint::CreateIssuePagesJob.perform_later(
work,
paths,
user,
admin_set_id
)
end
private

# fill the array of pdf files' upload paths
def hold_upload_paths(env)
return unless env.attributes.keys.include? 'uploaded_files'
upload_ids = filter_file_ids(env.attributes['uploaded_files'])
Expand All @@ -64,6 +31,17 @@ def hold_upload_paths(env)
@pdf_paths = paths.select { |path| path.end_with?('.pdf') }
end

def responds_to_split?(curation_concern)
return true if curation_concern.respond_to?(:split_pdf)
false
end

def after_other_actors(env)
handle_issue_upload(env) if responds_to_split?(env.curation_concern)
# needs to return true to not break actor stack traversal
true
end

def handle_issue_upload(env)
return if @pdf_paths.empty?
work = env.curation_concern
Expand All @@ -74,6 +52,30 @@ def handle_issue_upload(env)
queue_job(work, @pdf_paths, user, env.attributes[:admin_set_id])
end

def queue_job(work, paths, user, admin_set_id)
IiifPrint::CreatePagesJob.perform_later(
work,
paths,
user,
admin_set_id
)
end

# TODO: test what happens when ensure_title is removed... the
# work is saved after all other actors, so this may be a non-issue?
# Work must have a title to save, and this actor's .create/.update
# methods run prior to the setting of form data. This ensures
# appropriate title is set on model.
def ensure_title(env)
form_title = env.attributes['title']
return if form_title.nil?
env.curation_concern.title = form_title
end

def default_admin_set
AdminSet.find_or_create_default_admin_set_id
end

# Given Hyrax::Upload object, return path to file on local filesystem
def upload_path(upload)
# so many layers to this onion:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module IiifPrint
# Create child page works for issue
class CreateIssuePagesJob < IiifPrint::ApplicationJob
# Break a pdf into individual pages
class CreatePagesJob < IiifPrint::ApplicationJob
def perform(work, _pdf_paths, user, admin_set_id)
# we will need depositor set on work, if it is nil
work.depositor ||= user
Expand Down
8 changes: 8 additions & 0 deletions app/models/concerns/iiif_print/iiif_print_behavior.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module IiifPrint
module IiifPrintBehavior
# adds IIIF Print behavior to an object
def split_pdf
true
end
end
end
188 changes: 141 additions & 47 deletions spec/actors/iiif_print/actors/iiif_print_upload_actor_spec.rb
Original file line number Diff line number Diff line change
@@ -1,69 +1,163 @@
require 'faraday'
require 'spec_helper'
require 'misc_shared'
# TODO: revisit commented out spec code which belongs in a feature or CreatePagesJob spec
# require 'faraday'
# require 'misc_shared'

RSpec.describe IiifPrint::Actors::IiifPrintUploadActor, :perform_enqueued do
include_context 'shared setup'

let(:issue) { build(:newspaper_issue) }
RSpec.describe IiifPrint::Actors::IiifPrintUploadActor do # , :perform_enqueued do
let(:work) { build(:newspaper_issue) }
let(:ability) { build(:ability) }
let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
let(:uploaded_txt_file) { create(:uploaded_txt_file) }
let(:uploaded_file_ids) { [uploaded_pdf_file.id, uploaded_txt_file.id] }
let(:attributes) { { title: ['foo'], uploaded_files: uploaded_file_ids } }
let(:terminator) { Hyrax::Actors::Terminator.new }
let(:no_pdf_attributes) { { title: ['foo'], uploaded_files: [] } }

# environment with uploads:
let(:env) { Hyrax::Actors::Environment.new(issue, ability, attributes) }
let(:with_pdf_env) { Hyrax::Actors::Environment.new(work, ability, attributes) }
# environment with NO uploads:
let(:edit_env) { Hyrax::Actors::Environment.new(work, ability, {}) }
# environment with NO uploads:
let(:edit_env) { Hyrax::Actors::Environment.new(issue, ability, {}) }
let(:no_pdf_env) { Hyrax::Actors::Environment.new(work, ability, no_pdf_attributes) }

let(:terminator) { Hyrax::Actors::Terminator.new }
let(:middleware) do
stack = ActionDispatch::MiddlewareStack.new.tap do |middleware|
middleware.use described_class
end
stack.build(terminator)
end

let(:uploaded_issue) do
middleware.public_send(:create, env)
# return work, reloaded, because env.curation_concern will be stale after
# running actor.
NewspaperIssue.find(env.curation_concern.id)
describe 'included in the actor stack' do
let(:stack) { Hyrax::CurationConcern.actor_factory }
it 'includes IiifPrint::UploadActor' do
expect(stack.middlewares).to include(IiifPrint::Actors::IiifPrintUploadActor)
end
end

let(:edited_issue) do
middleware.public_send(:update, edit_env)
NewspaperIssue.find(edit_env.curation_concern.id)
context 'when work model includes IiifPrintBehavior' do
describe ':create' do
let(:mode) { :create }
before do
allow(work).to receive(:respond_to?).and_call_original
allow(work).to receive(:respond_to?).with(:split_pdf).and_return true
end
context 'when work has a pdf file' do
let(:mode_env) { with_pdf_env }
it 'queues a IiifPrint::CreatePagesJob' do
expect(IiifPrint::CreatePagesJob).to receive(:perform_later).with(
work,
["/app/samvera/hyrax-webapp/.internal_test_app/tmp/uploads/hyrax/uploaded_file/file/1/minimal-2-page.pdf"],
"[email protected]",
"admin_set/default"
)
expect(middleware.public_send(mode, mode_env)).to be true
end
end
context 'when work has no pdf file' do
let(:mode_env) { no_pdf_env }
it 'does not queue IiifPrint::CreatePagesJob' do
expect(IiifPrint::CreatePagesJob).not_to receive(:perform_later)
expect(middleware.public_send(mode, mode_env)).to be true
end
end
end

describe ':update' do
let(:mode) { :update }
before do
allow(work).to receive(:respond_to?).and_call_original
allow(work).to receive(:respond_to?).with(:split_pdf).and_return true
end
context 'works is updated with no additional uploads' do
let(:mode_env) { edit_env }
it 'queues a IiifPrint::CreatePagesJob' do
expect(IiifPrint::CreatePagesJob).not_to receive(:perform_later)
expect(middleware.public_send(mode, mode_env)).to be true
end
end
end
end

describe "NewspaperIssue upload of PDF" do
do_now_jobs = [
IiifPrint::CreateIssuePagesJob,
IngestLocalFileJob,
IngestJob
]
context 'when work model does not IiifPrintBehavior' do
describe ':create' do
let(:mode) { :create }
before do
allow(work).to receive(:respond_to?).and_call_original
allow(work).to receive(:respond_to?).with(:split_pdf).and_return false
end
context 'when work has a pdf file' do
let(:mode_env) { with_pdf_env }
it 'queues a IiifPrint::CreatePagesJob' do
expect(IiifPrint::CreatePagesJob).not_to receive(:perform_later)
expect(middleware.public_send(mode, mode_env)).to be true
end
end
context 'when work has no pdf file' do
let(:mode_env) { no_pdf_env }
it 'does not queue IiifPrint::CreatePagesJob' do
expect(IiifPrint::CreatePagesJob).not_to receive(:perform_later)
expect(middleware.public_send(mode, mode_env)).to be true
end
end
end

# we over-burden one example, because sadly RSpec does not do well with
# shared state across examples (without use of `before(:all)` which is
# mutually exclusive with `let` in practice, and ruffles rubocop's
# overzealous sense of moral duty, speaking of which:
it "creates child pages for issue", perform_enqueued: do_now_jobs do
pages = uploaded_issue.ordered_pages
expect(pages.size).to eq 2
page = pages[0]
# Page needs correct admin set:
expect(page.admin_set_id).to eq 'admin_set/default'
file_sets = page.members.select { |v| v.class == FileSet }
expect(file_sets.size).to eq 1
files = file_sets[0].files
url = files[0].uri.to_s
# fetch the thing from Fedora Commons:
response = Faraday.get(url)
stored_size = response.body.length
expect(stored_size).to be > 0
# expect that subsequent edits of same issue (run though update
# method of actor stack) do not duplicate pages (verify by count):
expect(edited_issue.id).to eq uploaded_issue.id
pages = edited_issue.ordered_pages
expect(pages.size).to eq 2 # still the same page count
describe ':update' do
let(:mode) { :update }
before do
allow(work).to receive(:respond_to?).and_call_original
allow(work).to receive(:respond_to?).with(:split_pdf).and_return false
end
context 'works is updated with no additional uploads' do
let(:mode_env) { edit_env }
it 'queues a IiifPrint::CreatePagesJob' do
expect(IiifPrint::CreatePagesJob).not_to receive(:perform_later)
expect(middleware.public_send(mode, mode_env)).to be true
end
end
end
end

# let(:uploaded_work) do
# middleware.public_send(:create, env)
# # return work, reloaded, because env.curation_concern will be stale after
# # running actor.
# NewspaperIssue.find(env.curation_concern.id)
# end
# let(:edited_work) do
# middleware.public_send(:update, edit_env)
# NewspaperIssue.find(edit_env.curation_concern.id)
# end

# describe "NewspaperIssue upload of PDF" do
# do_now_jobs = [
# IiifPrint::CreatePagesJob,
# IngestLocalFileJob,
# IngestJob
# ]

# # we over-burden one example, because sadly RSpec does not do well with
# # shared state across examples (without use of `before(:all)` which is
# # mutually exclusive with `let` in practice, and ruffles rubocop's
# # overzealous sense of moral duty, speaking of which:
# xit "creates child pages for issue", perform_enqueued: do_now_jobs do
# pages = uploaded_issue.ordered_pages
# expect(pages.size).to eq 2
# page = pages[0]
# # Page needs correct admin set:
# expect(page.admin_set_id).to eq 'admin_set/default'
# file_sets = page.members.select { |v| v.class == FileSet }
# expect(file_sets.size).to eq 1
# files = file_sets[0].files
# url = files[0].uri.to_s
# # fetch the thing from Fedora Commons:
# response = Faraday.get(url)
# stored_size = response.body.length
# expect(stored_size).to be > 0
# # expect that subsequent edits of same issue (run though update
# # method of actor stack) do not duplicate pages (verify by count):
# expect(edited_issue.id).to eq uploaded_issue.id
# pages = edited_issue.ordered_pages
# expect(pages.size).to eq 2 # still the same page count
# end
# end
end
9 changes: 9 additions & 0 deletions spec/factories/uploaded_txt_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FactoryBot.define do
factory :uploaded_txt_file, class: Hyrax::UploadedFile do
initialize_with do
base = File.join(IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files')
file_path = File.join(base, 'ndnp-sample1-txt.txt')
new(file: File.open(file_path), user: create(:user))
end
end
end
18 changes: 18 additions & 0 deletions spec/models/concerns/iiif_print/iiif_print_behavior_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'spec_helper'
RSpec.describe IiifPrint::IiifPrintBehavior do
describe "including_this_module" do
before do
class PrintWork < ActiveFedora::Base
include IiifPrint::IiifPrintBehavior
end
end
let(:klass) { Class.new }
subject { PrintWork.new }

describe 'split_pdf' do
it 'is true' do
expect(subject.split_pdf).to be true
end
end
end
end

0 comments on commit 20425eb

Please sign in to comment.