diff --git a/app/jobs/characterize_job.rb b/app/jobs/characterize_job.rb index 6ea95b4d02..0c15f8ff39 100644 --- a/app/jobs/characterize_job.rb +++ b/app/jobs/characterize_job.rb @@ -1,33 +1,9 @@ +# Characterizes the file at 'filepath' if available, otherwise, pulls a copy from the repository +# and runs characterization on that file. class CharacterizeJob < Hyrax::ApplicationJob queue_as Hyrax.config.ingest_queue_name - # Characterizes the file at 'filepath' if available, otherwise, pulls a copy from the repository - # and runs characterization on that file. - # @param [FileSet] file_set - # @param [String] file_id identifier for a Hydra::PCDM::File - # @param [String, NilClass] filepath the cached file within the Hyrax.config.working_path - def perform(file_set, file_id, filepath = nil) - raise "#{file_set.class.characterization_proxy} was not found for FileSet #{file_set.id}" unless file_set.characterization_proxy? - filepath = Hyrax::WorkingDirectory.find_or_retrieve(file_id, file_set.id) unless filepath && File.exist?(filepath) - characterize(file_set, file_id, filepath) - CreateDerivativesJob.perform_later(file_set, file_id, filepath) + def perform(file_set) + Hyrax::Characterizer.for(source: file_set).characterize end - - private - - def characterize(file_set, _file_id, filepath) - Hydra::Works::CharacterizationService.run(file_set.characterization_proxy, filepath) - Rails.logger.debug "Ran characterization on #{file_set.characterization_proxy.id} (#{file_set.characterization_proxy.mime_type})" - file_set.characterization_proxy.alpha_channels = channels(filepath) if file_set.image? && Hyrax.config.iiif_image_server? - file_set.characterization_proxy.save! - file_set.update_index - end - - def channels(filepath) - ch = MiniMagick::Tool::Identify.new do |cmd| - cmd.format '%[channels]' - cmd << filepath - end - [ch] - end end diff --git a/app/models/hyrax/file_set.rb b/app/models/hyrax/file_set.rb index 92289b1cbb..22df222706 100644 --- a/app/models/hyrax/file_set.rb +++ b/app/models/hyrax/file_set.rb @@ -7,6 +7,8 @@ module Hyrax # @see https://wiki.duraspace.org/display/samvera/Hydra%3A%3AWorks+Shared+Modeling class FileSet < Hyrax::Resource include Hyrax::Schema(:core_metadata) + include Hyrax::FileSet::Characterization + include Hydra::Works::MimeTypes attribute :file_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID) # id for FileMetadata resources attribute :original_file_id, Valkyrie::Types::ID # id for FileMetadata resource diff --git a/app/services/hyrax/characterizer.rb b/app/services/hyrax/characterizer.rb new file mode 100644 index 0000000000..5cd59ba454 --- /dev/null +++ b/app/services/hyrax/characterizer.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Hyrax + ## + # Determines which characterizer to run based on the file_set type + # allowing implementation of Valkyrie file_sets + class Characterizer + ## + # @param source: the object to run a characterizer on + # + # @return [#characterize] + def self.for(source:) + case source + when Hyrax::FileSetBehavior # ActiveFedora + FileSetCharacterizer.new(source: source) + when Hyrax::FileSet # Valkyrie + ResourceCharacterizer.new(source: source) + end + end + end +end diff --git a/app/services/hyrax/file_set_characterizer.rb b/app/services/hyrax/file_set_characterizer.rb new file mode 100644 index 0000000000..faf65d0bac --- /dev/null +++ b/app/services/hyrax/file_set_characterizer.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Hyrax + ## + # Characterizes an ActiveFedora based FileSet + class FileSetCharacterizer + ## + # @!attribute [rw] source + # @return [#characterize] + attr_accessor :source + + ## + # @param source the object to characterize + def initialize(source:) + @source = source + end + + ## + # @return [void] + # + # @raise [RuntimeError] if FileSet is missing the characterization_proxy + def characterize + Hydra::Works::CharacterizationService.run(characterization_proxy, filepath) + Rails.logger.debug "Ran characterization on #{characterization_proxy.id} (#{characterization_proxy.mime_type})" + characterization_proxy.alpha_channels = channels(filepath) if source.image? && Hyrax.config.iiif_image_server? + characterization_proxy.save! + source.update_index + CreateDerivativesJob.perform_later(source, source.original_file.id, filepath) + end + + private + + def characterization_proxy + raise "#{source.class.characterization_proxy} was not found for FileSet #{source.id}" unless source.characterization_proxy? + source.characterization_proxy + end + + def filepath + Hyrax::WorkingDirectory.find_or_retrieve(source.original_file.id, source.id) + end + + def channels(path) + ch = MiniMagick::Tool::Identify.new do |cmd| + cmd.format '%[channels]' + cmd << path + end + [ch] + end + end +end diff --git a/app/services/hyrax/resource_characterizer.rb b/app/services/hyrax/resource_characterizer.rb new file mode 100644 index 0000000000..cf5abb44ec --- /dev/null +++ b/app/services/hyrax/resource_characterizer.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Hyrax + ## + # Characterizes a Valkyrie based FileSet + class ResourceCharacterizer + ## + # @!attribute [rw] source + # @return [#characterize] + attr_accessor :source + + ## + # @param source the object to characterize + def initialize(source:) + @source = source + end + + ## + # @return [void] + # + # @raise [RuntimeError] if FileSet is missing the characterization_proxy + def characterize + Hydra::Works::CharacterizationService.run(characterization_proxy, filepath) + Rails.logger.debug "Ran characterization on #{characterization_proxy.id} (#{characterization_proxy.mime_type})" + characterization_proxy.alpha_channels = channels(filepath) if source.image? && Hyrax.config.iiif_image_server? + Hyrax.persister.save(resource: characterization_proxy) + Hyrax.persister.save(resource: source) + CreateDerivativesJob.perform_later(source, source.original_file.id, filepath) + end + + private + + def characterization_proxy + raise "#{source.class.characterization_proxy} was not found for FileSet #{source.id}" unless source.characterization_proxy? + source.characterization_proxy + end + + def filepath + # The current version of Valkyrie id returns a Valkyrie::ID and requires a .id to actually retrieve the id. + # This should be updated to source.id after a Valkyrie update + Hyrax::WorkingDirectory.find_or_retrieve(source.original_file.id, source.id.id) + end + + def channels(path) + ch = MiniMagick::Tool::Identify.new do |cmd| + cmd.format '%[channels]' + cmd << path + end + [ch] + end + end +end diff --git a/spec/jobs/characterize_job_spec.rb b/spec/jobs/characterize_job_spec.rb index f67aedb392..845a0c6b5d 100644 --- a/spec/jobs/characterize_job_spec.rb +++ b/spec/jobs/characterize_job_spec.rb @@ -7,7 +7,6 @@ allow(fs).to receive(:update_index) end end - # let(:io) { JobIoWrapper.new(file_set_id: file_set.id, user: create(:user), path: filename) } let(:file) do Hydra::PCDM::File.new.tap do |f| f.content = 'foo' @@ -17,36 +16,17 @@ end end - before do - allow(FileSet).to receive(:find).with(file_set_id).and_return(file_set) - allow(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) - allow(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename) - end - - context 'with valid filepath param' do - let(:filename) { File.join(fixture_path, 'world.png') } + context "when the file set's work is in a collection" do + let(:work) { build(:generic_work) } + let(:collection) { build(:collection_lw) } - it 'skips Hyrax::WorkingDirectory' do - expect(Hyrax::WorkingDirectory).not_to receive(:find_or_retrieve) - expect(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) - described_class.perform_now(file_set, file.id, filename) + before do + allow(file_set).to receive(:parent).and_return(work) + allow(work).to receive(:in_collections).and_return([collection]) end - end - - context 'when the characterization proxy content is present' do - it 'runs Hydra::Works::CharacterizationService and creates a CreateDerivativesJob' do - expect(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) - expect(file).to receive(:save!) - expect(file_set).to receive(:update_index) - expect(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename) - described_class.perform_now(file_set, file.id) - end - end - - context 'when the characterization proxy content is absent' do - before { allow(file_set).to receive(:characterization_proxy?).and_return(false) } - it 'raises an error' do - expect { described_class.perform_now(file_set, file.id) }.to raise_error(StandardError, /original_file was not found/) + it "reindexes the collection" do + expect(collection).to receive(:update_index) + described_class.perform_now(file_set) end end end diff --git a/spec/services/hyrax/file_set_characterizer_spec.rb b/spec/services/hyrax/file_set_characterizer_spec.rb new file mode 100644 index 0000000000..2f00882803 --- /dev/null +++ b/spec/services/hyrax/file_set_characterizer_spec.rb @@ -0,0 +1,43 @@ +RSpec.describe Hyrax::FileSetCharacterizer do + subject(:characterizer) { described_class.new(source: file_set) } + + let(:file_set_id) { 'abc12345' } + let(:filename) { Rails.root.join('tmp', 'uploads', 'ab', 'c1', '23', '45', 'abc12345', 'picture.png').to_s } + let(:file_set) do + FileSet.new(id: file_set_id).tap do |fs| + allow(fs).to receive(:original_file).and_return(file) + allow(fs).to receive(:update_index) + end + end + let(:file) do + Hydra::PCDM::File.new.tap do |f| + f.content = 'foo' + f.original_name = 'picture.png' + f.save! + allow(f).to receive(:save!) + end + end + + before do + allow(FileSet).to receive(:find).with(file_set_id).and_return(file_set) + allow(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) + allow(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename) + end + + context 'when the characterization proxy content is present' do + it 'runs Hydra::Works::CharacterizationService and creates a CreateDerivativesJob' do + expect(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) + expect(file).to receive(:save!) + expect(file_set).to receive(:update_index) + expect(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename) + characterizer.characterize + end + end + + context 'when the characterization proxy content is absent' do + before { allow(file_set).to receive(:characterization_proxy?).and_return(false) } + it 'raises an error' do + expect { characterizer.characterize }.to raise_error(StandardError, /original_file was not found/) + end + end +end diff --git a/spec/services/hyrax/resource_characterizer_spec.rb b/spec/services/hyrax/resource_characterizer_spec.rb new file mode 100644 index 0000000000..21d88a1bbb --- /dev/null +++ b/spec/services/hyrax/resource_characterizer_spec.rb @@ -0,0 +1,31 @@ +RSpec.describe Hyrax::ResourceCharacterizer do + subject(:characterizer) { described_class.new(source: resource) } + + before do + allow(Hyrax::FileSet).to receive(:find).with(resource.id).and_return(resource) + allow(Hydra::Works::CharacterizationService).to receive(:run) + allow(Hyrax.persister).to receive(:save) + allow(CreateDerivativesJob).to receive(:perform_later) + end + + context 'with a complete Valkyrie FileSet' do + let(:resource) { FactoryBot.create(:file_set, content: File.open(fixture_path + '/world.png')).valkyrie_resource } + + it 'runs Hydra::Works::CharacterizationService and creates a CreateDerivativesJob' do + expect(Hydra::Works::CharacterizationService).to receive(:run).with(resource.original_file, anything) + expect(CreateDerivativesJob).to receive(:perform_later).with(resource, resource.original_file.id, anything) + expect(Hyrax.persister).to receive(:save).twice + characterizer.characterize + end + end + + context 'without a complete Valkyrie FileSet' do + let(:resource) { FactoryBot.create(:file_set).valkyrie_resource } + + before { allow(resource).to receive(:characterization_proxy?).and_return(false) } + + it 'runs Hydra::Works::CharacterizationService and creates a CreateDerivativesJob' do + expect { characterizer.characterize }.to raise_error(StandardError, /original_file was not found/) + end + end +end