diff --git a/app/indexers/concerns/iiif_print/file_set_indexer.rb b/app/indexers/concerns/iiif_print/file_set_indexer.rb index a8d7a60e..b42e8900 100644 --- a/app/indexers/concerns/iiif_print/file_set_indexer.rb +++ b/app/indexers/concerns/iiif_print/file_set_indexer.rb @@ -35,7 +35,7 @@ def find_checksum(object) return unless file digest ||= if file.is_a?(Hyrax::FileMetadata) - file.checksum + Array.wrap(file.checksum).first else # file is a Hydra::PCDM::File (ActiveFedora) file.digest.first end @@ -48,8 +48,8 @@ def all_text(object) file = object.original_file return unless file - text = IiifPrint.config.all_text_generator_function.call(object: object) || '' - return text if text.empty? + text = IiifPrint.extract_text_for(file_set: object) + return text if text.blank? text.tr("\n", ' ').squeeze(' ') end diff --git a/app/services/iiif_print/pluggable_derivative_service.rb b/app/services/iiif_print/pluggable_derivative_service.rb index a909f677..1ebb8bf2 100644 --- a/app/services/iiif_print/pluggable_derivative_service.rb +++ b/app/services/iiif_print/pluggable_derivative_service.rb @@ -27,7 +27,11 @@ class IiifPrint::PluggableDerivativeService class_attribute :derivative_path_factory, default: Hyrax::DerivativePath def initialize(file_set, plugins: plugins_for(file_set)) - @file_set = file_set + @file_set = if file_set.is_a?(Hyrax::FileMetadata) + Hyrax.query_service.find_by(id: file_set.file_set_id) + else + file_set + end @plugins = Array.wrap(plugins) @valid_plugins = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?) end diff --git a/app/views/hyrax/file_sets/_actions.html.erb b/app/views/hyrax/file_sets/_actions.html.erb deleted file mode 100644 index e2f47cc5..00000000 --- a/app/views/hyrax/file_sets/_actions.html.erb +++ /dev/null @@ -1,46 +0,0 @@ -
- - - - -
diff --git a/lib/iiif_print.rb b/lib/iiif_print.rb index 1d187788..2d3f2f0f 100644 --- a/lib/iiif_print.rb +++ b/lib/iiif_print.rb @@ -52,8 +52,10 @@ class << self delegate( :clean_for_tests!, + :copy_derivatives_from_data_store, :create_relationship_between, :destroy_children_split_from, + :extract_text_for, :find_by, :find_by_title_for, :grandparent_for, diff --git a/lib/iiif_print/base_derivative_service.rb b/lib/iiif_print/base_derivative_service.rb index 9d5278b0..c027b48d 100644 --- a/lib/iiif_print/base_derivative_service.rb +++ b/lib/iiif_print/base_derivative_service.rb @@ -7,7 +7,11 @@ class BaseDerivativeService class_attribute :target_extension, default: nil def initialize(file_set) - @file_set = file_set + @file_set = if file_set.is_a?(Hyrax::FileMetadata) + Hyrax.query_service.find_by(id: file_set.file_set_id) + else + file_set + end @dest_path = nil @source_path = nil @source_meta = nil @@ -26,7 +30,10 @@ def initialize(file_set) # @return [Boolean] def valid? # @note We are taking a shortcut because currently we are only concerned about images. - file_set.class.image_mime_types.include?(file_set.mime_type) + # @TODO: verify if this works for ActiveFedora and if so, remove commented code. + # If not, modify to use adapter. + # file_set.class.image_mime_types.include?(file_set.mime_type) + file_set.original_file.image? end def derivative_path_factory @@ -110,5 +117,9 @@ def jp2_convert # intermediate -> PDF im_convert end + + def mime_type_for(extension) + Marcel::MimeType.for extension: extension + end end end diff --git a/lib/iiif_print/jp2_derivative_service.rb b/lib/iiif_print/jp2_derivative_service.rb index cb4baef3..325a18b1 100644 --- a/lib/iiif_print/jp2_derivative_service.rb +++ b/lib/iiif_print/jp2_derivative_service.rb @@ -53,7 +53,10 @@ def create_derivatives(filename) render_cmd = opj_command # Run the generated command to make derivative file at @dest_path - `#{render_cmd}` + data = `#{render_cmd}` + + # Create Hyrax::FileMetadata object for the derivatives (if Valkyrie) + IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) }) # Clean up any intermediate files or symlinks used during creation cleanup_intermediate diff --git a/lib/iiif_print/pdf_derivative_service.rb b/lib/iiif_print/pdf_derivative_service.rb index 0574dc53..81ff239d 100644 --- a/lib/iiif_print/pdf_derivative_service.rb +++ b/lib/iiif_print/pdf_derivative_service.rb @@ -24,7 +24,9 @@ def initialize(file_set) # JP2 source, and whether we have color or grayscale material. def convert_cmd template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD - format(template, source_file: @source_path, out_file: @dest_path) + data = format(template, source_file: @source_path, out_file: @dest_path) + IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) }) + data end def create_derivatives(filename) diff --git a/lib/iiif_print/persistence_layer.rb b/lib/iiif_print/persistence_layer.rb index aa10a12d..5ddd62d0 100644 --- a/lib/iiif_print/persistence_layer.rb +++ b/lib/iiif_print/persistence_layer.rb @@ -102,7 +102,15 @@ def self.save(object:) raise NotImplementedError, "#{self}.{__method__}" end - def index_works(objects:) + def self.index_works(objects:) + raise NotImplementedError, "#{self}.{__method__}" + end + + def self.copy_derivatives_from_data_store(stream:, directives:) + raise NotImplementedError, "#{self}.{__method__}" + end + + def self.extract_text_for(file_set:) raise NotImplementedError, "#{self}.{__method__}" end end diff --git a/lib/iiif_print/persistence_layer/active_fedora_adapter.rb b/lib/iiif_print/persistence_layer/active_fedora_adapter.rb index ccb484a3..6ff9bd07 100644 --- a/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +++ b/lib/iiif_print/persistence_layer/active_fedora_adapter.rb @@ -165,6 +165,25 @@ def self.index_works(objects:) end true end + + ## + # does nothing for ActiveFedora; + # allows valkyrie works to have an extra step to create the Hyrax::Metadata objects. + # + # @param [] + # @return [TrueClass] + def self.copy_derivatives_from_data_store(*) + true + end + + ## + # Extract text from the derivatives + # + # @param [FileSet] an ActiveFedora fileset + # @return [String] Text from fileset's file + def self.extract_text_for(file_set:) + IiifPrint.config.all_text_generator_function.call(object: file_set) || '' + end end end end diff --git a/lib/iiif_print/persistence_layer/valkyrie_adapter.rb b/lib/iiif_print/persistence_layer/valkyrie_adapter.rb index d9187016..ecb1d321 100644 --- a/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +++ b/lib/iiif_print/persistence_layer/valkyrie_adapter.rb @@ -154,6 +154,30 @@ def self.index_works(objects:) end true end + + ## + # Performs an extra step to create the Hyrax::Metadata objects + # for derivatives. + # + # @param [] + # @return [TrueClass] + def self.copy_derivatives_from_data_store(stream:, directives:) + Hyrax::ValkyriePersistDerivatives.call(stream, directives) + end + + ## + # Extract text from the derivatives + # + # @param [Hyrax::FileSet] a Valkyrie fileset + # @return [String] Text from fileset's file + def self.extract_text_for(file_set:) + fm = Hyrax.custom_queries.find_many_file_metadata_by_use(resource: file_set, + use: Hyrax::FileMetadata::Use.uri_for(use: :extracted_file)) + return if fm.empty? + text_fm = fm.find { |t| t.mime_type == Marcel::MimeType.for(extension: 'txt') } + return if text_fm.nil? + text_fm.content + end end end end diff --git a/lib/iiif_print/text_extraction_derivative_service.rb b/lib/iiif_print/text_extraction_derivative_service.rb index 7bfe3b16..6849dbc4 100644 --- a/lib/iiif_print/text_extraction_derivative_service.rb +++ b/lib/iiif_print/text_extraction_derivative_service.rb @@ -28,13 +28,15 @@ def create_derivatives_from_ocr(filename) ocr_derivatives.each do |extension, method_name| path = prepare_path(extension.to_s) - write(content: ocr.public_send(method_name), path: path) + write(content: ocr.public_send(method_name), path: path, extension: extension) end end - def write(content:, path:) + def write(content:, path:, extension:) + mime_type = mime_type_for(extension) File.open(path, 'w') do |outfile| outfile.write(content) + IiifPrint.copy_derivatives_from_data_store(stream: content, directives: { url: path, container: 'extracted_text', mime_type: mime_type }) end end diff --git a/lib/iiif_print/text_formats_from_alto_service.rb b/lib/iiif_print/text_formats_from_alto_service.rb index 7ab13408..1affecee 100644 --- a/lib/iiif_print/text_formats_from_alto_service.rb +++ b/lib/iiif_print/text_formats_from_alto_service.rb @@ -4,9 +4,10 @@ module IiifPrint # NOTE: to keep this from conflicting with TextExtractionDerivativeService, # this class should be invoked by it, not PluggableDerivativeService. class TextFormatsFromALTOService < BaseDerivativeService - self.target_extension = 'tiff'.freeze + self.target_extension = 'txt'.freeze def save_derivative(destination, data) + mime_type = mime_type_for(destination) # Load/prepare base of "pairtree" dir structure for extension, fileset prepare_path(destination) # @@ -17,6 +18,7 @@ def save_derivative(destination, data) # Write data as UTF-8 encoded text File.open(save_path, "w:UTF-8") do |f| f.write(data) + IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'extracted_text', mime_type: mime_type }) end end diff --git a/lib/iiif_print/tiff_derivative_service.rb b/lib/iiif_print/tiff_derivative_service.rb index 0b1f6347..f3f88957 100644 --- a/lib/iiif_print/tiff_derivative_service.rb +++ b/lib/iiif_print/tiff_derivative_service.rb @@ -32,7 +32,9 @@ def convert_cmd source_path += '[0]' if @source_path.ends_with?('pdf') template = use_color? ? COLOR_CMD : GRAY_CMD template = MONO_CMD if one_bit? - format(template, source_file: source_path, out_file: @dest_path) + data = format(template, source_file: source_path, out_file: @dest_path) + IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) }) + data end def create_derivatives(filename) diff --git a/spec/iiif_print/base_derivative_service_spec.rb b/spec/iiif_print/base_derivative_service_spec.rb index 40c658b9..4f2d0fd5 100644 --- a/spec/iiif_print/base_derivative_service_spec.rb +++ b/spec/iiif_print/base_derivative_service_spec.rb @@ -3,18 +3,20 @@ RSpec.describe IiifPrint::BaseDerivativeService do let(:file_set) { double(FileSet) } let(:service) { described_class.new(file_set) } + let(:image_file) { double(image?: true) } + let(:other_file) { double(image?: false) } describe '#valid?' do subject { service.valid? } context 'when given an image file' do - let(:file_set) { double(FileSet, mime_type: 'image/tiff', class: FileSet) } + let(:file_set) { double(FileSet, mime_type: 'image/tiff', class: FileSet, original_file: image_file) } it { is_expected.to be_truthy } end context 'when given a non-image file' do - let(:file_set) { double(FileSet, mime_type: 'audio/mpeg', class: FileSet) } + let(:file_set) { double(FileSet, mime_type: 'audio/mpeg', class: FileSet, original_file: other_file) } it { is_expected.to be_falsey } end diff --git a/spec/services/iiif_print/pdf_derivative_service_spec.rb b/spec/services/iiif_print/pdf_derivative_service_spec.rb index aec72f33..e160312a 100644 --- a/spec/services/iiif_print/pdf_derivative_service_spec.rb +++ b/spec/services/iiif_print/pdf_derivative_service_spec.rb @@ -5,13 +5,17 @@ file_set.save!(validate: false) file_set end - + let(:image_file) { double(image?: true) } let(:fixture_path) do File.join( IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files' ) end + before do + allow(valid_file_set).to receive(:original_file).and_return(image_file) + end + describe "Creates PDF derivatives" do def source_image(name) File.join(fixture_path, name) diff --git a/spec/services/iiif_print/pluggable_derivative_service_spec.rb b/spec/services/iiif_print/pluggable_derivative_service_spec.rb index 6fc9d0cc..f7098c60 100644 --- a/spec/services/iiif_print/pluggable_derivative_service_spec.rb +++ b/spec/services/iiif_print/pluggable_derivative_service_spec.rb @@ -2,6 +2,7 @@ require 'spec_helper' RSpec.describe IiifPrint::PluggableDerivativeService do + let(:work) { MyWork.new } let(:persisted_file_set) do fs = FileSet.new work.title = ['This is a page!'] @@ -11,13 +12,17 @@ work.save!(validate: false) fs end - + let(:image_file) { double(image?: true) } let(:fixture_path) do File.join( IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files' ) end + before do + allow(persisted_file_set).to receive(:original_file).and_return(image_file) + end + describe "service registration" do # integration test with Hyrax, verify services is registered @@ -29,7 +34,8 @@ file_set = double(FileSet, class: FileSet, mime_type: 'application/pdf', - parent: MyIiifConfiguredWorkWithAllDerivativeServices.new) + parent: MyIiifConfiguredWorkWithAllDerivativeServices.new, + original_file: image_file) found = Hyrax::DerivativeService.for(file_set) expect(found).to be_a described_class end @@ -40,14 +46,13 @@ allow(persisted_file_set).to receive(:in_works).and_return([work]) end - let(:work) { MyWork.new } - describe "#plugins" do it "uses the default derivatives service" do file_set = double(FileSet, class: FileSet, mime_type: 'application/pdf', - parent: MyWork.new) + parent: MyWork.new, + original_file: image_file) service = described_class.new(file_set) expect(service.plugins).to eq [Hyrax::FileSetDerivativesService] end