diff --git a/app/indexers/concerns/iiif_print/file_set_indexer.rb b/app/indexers/concerns/iiif_print/file_set_indexer.rb
index a8d7a60e..b42e8900 100644
--- a/app/indexers/concerns/iiif_print/file_set_indexer.rb
+++ b/app/indexers/concerns/iiif_print/file_set_indexer.rb
@@ -35,7 +35,7 @@ def find_checksum(object)
return unless file
digest ||= if file.is_a?(Hyrax::FileMetadata)
- file.checksum
+ Array.wrap(file.checksum).first
else # file is a Hydra::PCDM::File (ActiveFedora)
file.digest.first
end
@@ -48,8 +48,8 @@ def all_text(object)
file = object.original_file
return unless file
- text = IiifPrint.config.all_text_generator_function.call(object: object) || ''
- return text if text.empty?
+ text = IiifPrint.extract_text_for(file_set: object)
+ return text if text.blank?
text.tr("\n", ' ').squeeze(' ')
end
diff --git a/app/services/iiif_print/pluggable_derivative_service.rb b/app/services/iiif_print/pluggable_derivative_service.rb
index a909f677..1ebb8bf2 100644
--- a/app/services/iiif_print/pluggable_derivative_service.rb
+++ b/app/services/iiif_print/pluggable_derivative_service.rb
@@ -27,7 +27,11 @@ class IiifPrint::PluggableDerivativeService
class_attribute :derivative_path_factory, default: Hyrax::DerivativePath
def initialize(file_set, plugins: plugins_for(file_set))
- @file_set = file_set
+ @file_set = if file_set.is_a?(Hyrax::FileMetadata)
+ Hyrax.query_service.find_by(id: file_set.file_set_id)
+ else
+ file_set
+ end
@plugins = Array.wrap(plugins)
@valid_plugins = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
end
diff --git a/app/views/hyrax/file_sets/_actions.html.erb b/app/views/hyrax/file_sets/_actions.html.erb
deleted file mode 100644
index e2f47cc5..00000000
--- a/app/views/hyrax/file_sets/_actions.html.erb
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
-
-
-
-
diff --git a/lib/iiif_print.rb b/lib/iiif_print.rb
index 1d187788..2d3f2f0f 100644
--- a/lib/iiif_print.rb
+++ b/lib/iiif_print.rb
@@ -52,8 +52,10 @@ class << self
delegate(
:clean_for_tests!,
+ :copy_derivatives_from_data_store,
:create_relationship_between,
:destroy_children_split_from,
+ :extract_text_for,
:find_by,
:find_by_title_for,
:grandparent_for,
diff --git a/lib/iiif_print/base_derivative_service.rb b/lib/iiif_print/base_derivative_service.rb
index 9d5278b0..c027b48d 100644
--- a/lib/iiif_print/base_derivative_service.rb
+++ b/lib/iiif_print/base_derivative_service.rb
@@ -7,7 +7,11 @@ class BaseDerivativeService
class_attribute :target_extension, default: nil
def initialize(file_set)
- @file_set = file_set
+ @file_set = if file_set.is_a?(Hyrax::FileMetadata)
+ Hyrax.query_service.find_by(id: file_set.file_set_id)
+ else
+ file_set
+ end
@dest_path = nil
@source_path = nil
@source_meta = nil
@@ -26,7 +30,10 @@ def initialize(file_set)
# @return [Boolean]
def valid?
# @note We are taking a shortcut because currently we are only concerned about images.
- file_set.class.image_mime_types.include?(file_set.mime_type)
+ # @TODO: verify if this works for ActiveFedora and if so, remove commented code.
+ # If not, modify to use adapter.
+ # file_set.class.image_mime_types.include?(file_set.mime_type)
+ file_set.original_file.image?
end
def derivative_path_factory
@@ -110,5 +117,9 @@ def jp2_convert
# intermediate -> PDF
im_convert
end
+
+ def mime_type_for(extension)
+ Marcel::MimeType.for extension: extension
+ end
end
end
diff --git a/lib/iiif_print/jp2_derivative_service.rb b/lib/iiif_print/jp2_derivative_service.rb
index cb4baef3..325a18b1 100644
--- a/lib/iiif_print/jp2_derivative_service.rb
+++ b/lib/iiif_print/jp2_derivative_service.rb
@@ -53,7 +53,10 @@ def create_derivatives(filename)
render_cmd = opj_command
# Run the generated command to make derivative file at @dest_path
- `#{render_cmd}`
+ data = `#{render_cmd}`
+
+ # Create Hyrax::FileMetadata object for the derivatives (if Valkyrie)
+ IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
# Clean up any intermediate files or symlinks used during creation
cleanup_intermediate
diff --git a/lib/iiif_print/pdf_derivative_service.rb b/lib/iiif_print/pdf_derivative_service.rb
index 0574dc53..81ff239d 100644
--- a/lib/iiif_print/pdf_derivative_service.rb
+++ b/lib/iiif_print/pdf_derivative_service.rb
@@ -24,7 +24,9 @@ def initialize(file_set)
# JP2 source, and whether we have color or grayscale material.
def convert_cmd
template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
- format(template, source_file: @source_path, out_file: @dest_path)
+ data = format(template, source_file: @source_path, out_file: @dest_path)
+ IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
+ data
end
def create_derivatives(filename)
diff --git a/lib/iiif_print/persistence_layer.rb b/lib/iiif_print/persistence_layer.rb
index aa10a12d..5ddd62d0 100644
--- a/lib/iiif_print/persistence_layer.rb
+++ b/lib/iiif_print/persistence_layer.rb
@@ -102,7 +102,15 @@ def self.save(object:)
raise NotImplementedError, "#{self}.{__method__}"
end
- def index_works(objects:)
+ def self.index_works(objects:)
+ raise NotImplementedError, "#{self}.{__method__}"
+ end
+
+ def self.copy_derivatives_from_data_store(stream:, directives:)
+ raise NotImplementedError, "#{self}.{__method__}"
+ end
+
+ def self.extract_text_for(file_set:)
raise NotImplementedError, "#{self}.{__method__}"
end
end
diff --git a/lib/iiif_print/persistence_layer/active_fedora_adapter.rb b/lib/iiif_print/persistence_layer/active_fedora_adapter.rb
index ccb484a3..6ff9bd07 100644
--- a/lib/iiif_print/persistence_layer/active_fedora_adapter.rb
+++ b/lib/iiif_print/persistence_layer/active_fedora_adapter.rb
@@ -165,6 +165,25 @@ def self.index_works(objects:)
end
true
end
+
+ ##
+ # does nothing for ActiveFedora;
+ # allows valkyrie works to have an extra step to create the Hyrax::Metadata objects.
+ #
+ # @param []
+ # @return [TrueClass]
+ def self.copy_derivatives_from_data_store(*)
+ true
+ end
+
+ ##
+ # Extract text from the derivatives
+ #
+ # @param [FileSet] an ActiveFedora fileset
+ # @return [String] Text from fileset's file
+ def self.extract_text_for(file_set:)
+ IiifPrint.config.all_text_generator_function.call(object: file_set) || ''
+ end
end
end
end
diff --git a/lib/iiif_print/persistence_layer/valkyrie_adapter.rb b/lib/iiif_print/persistence_layer/valkyrie_adapter.rb
index d9187016..ecb1d321 100644
--- a/lib/iiif_print/persistence_layer/valkyrie_adapter.rb
+++ b/lib/iiif_print/persistence_layer/valkyrie_adapter.rb
@@ -154,6 +154,30 @@ def self.index_works(objects:)
end
true
end
+
+ ##
+ # Performs an extra step to create the Hyrax::Metadata objects
+ # for derivatives.
+ #
+ # @param []
+ # @return [TrueClass]
+ def self.copy_derivatives_from_data_store(stream:, directives:)
+ Hyrax::ValkyriePersistDerivatives.call(stream, directives)
+ end
+
+ ##
+ # Extract text from the derivatives
+ #
+ # @param [Hyrax::FileSet] a Valkyrie fileset
+ # @return [String] Text from fileset's file
+ def self.extract_text_for(file_set:)
+ fm = Hyrax.custom_queries.find_many_file_metadata_by_use(resource: file_set,
+ use: Hyrax::FileMetadata::Use.uri_for(use: :extracted_file))
+ return if fm.empty?
+ text_fm = fm.find { |t| t.mime_type == Marcel::MimeType.for(extension: 'txt') }
+ return if text_fm.nil?
+ text_fm.content
+ end
end
end
end
diff --git a/lib/iiif_print/text_extraction_derivative_service.rb b/lib/iiif_print/text_extraction_derivative_service.rb
index 7bfe3b16..6849dbc4 100644
--- a/lib/iiif_print/text_extraction_derivative_service.rb
+++ b/lib/iiif_print/text_extraction_derivative_service.rb
@@ -28,13 +28,15 @@ def create_derivatives_from_ocr(filename)
ocr_derivatives.each do |extension, method_name|
path = prepare_path(extension.to_s)
- write(content: ocr.public_send(method_name), path: path)
+ write(content: ocr.public_send(method_name), path: path, extension: extension)
end
end
- def write(content:, path:)
+ def write(content:, path:, extension:)
+ mime_type = mime_type_for(extension)
File.open(path, 'w') do |outfile|
outfile.write(content)
+ IiifPrint.copy_derivatives_from_data_store(stream: content, directives: { url: path, container: 'extracted_text', mime_type: mime_type })
end
end
diff --git a/lib/iiif_print/text_formats_from_alto_service.rb b/lib/iiif_print/text_formats_from_alto_service.rb
index 7ab13408..1affecee 100644
--- a/lib/iiif_print/text_formats_from_alto_service.rb
+++ b/lib/iiif_print/text_formats_from_alto_service.rb
@@ -4,9 +4,10 @@ module IiifPrint
# NOTE: to keep this from conflicting with TextExtractionDerivativeService,
# this class should be invoked by it, not PluggableDerivativeService.
class TextFormatsFromALTOService < BaseDerivativeService
- self.target_extension = 'tiff'.freeze
+ self.target_extension = 'txt'.freeze
def save_derivative(destination, data)
+ mime_type = mime_type_for(destination)
# Load/prepare base of "pairtree" dir structure for extension, fileset
prepare_path(destination)
#
@@ -17,6 +18,7 @@ def save_derivative(destination, data)
# Write data as UTF-8 encoded text
File.open(save_path, "w:UTF-8") do |f|
f.write(data)
+ IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'extracted_text', mime_type: mime_type })
end
end
diff --git a/lib/iiif_print/tiff_derivative_service.rb b/lib/iiif_print/tiff_derivative_service.rb
index 0b1f6347..f3f88957 100644
--- a/lib/iiif_print/tiff_derivative_service.rb
+++ b/lib/iiif_print/tiff_derivative_service.rb
@@ -32,7 +32,9 @@ def convert_cmd
source_path += '[0]' if @source_path.ends_with?('pdf')
template = use_color? ? COLOR_CMD : GRAY_CMD
template = MONO_CMD if one_bit?
- format(template, source_file: source_path, out_file: @dest_path)
+ data = format(template, source_file: source_path, out_file: @dest_path)
+ IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
+ data
end
def create_derivatives(filename)
diff --git a/spec/iiif_print/base_derivative_service_spec.rb b/spec/iiif_print/base_derivative_service_spec.rb
index 40c658b9..4f2d0fd5 100644
--- a/spec/iiif_print/base_derivative_service_spec.rb
+++ b/spec/iiif_print/base_derivative_service_spec.rb
@@ -3,18 +3,20 @@
RSpec.describe IiifPrint::BaseDerivativeService do
let(:file_set) { double(FileSet) }
let(:service) { described_class.new(file_set) }
+ let(:image_file) { double(image?: true) }
+ let(:other_file) { double(image?: false) }
describe '#valid?' do
subject { service.valid? }
context 'when given an image file' do
- let(:file_set) { double(FileSet, mime_type: 'image/tiff', class: FileSet) }
+ let(:file_set) { double(FileSet, mime_type: 'image/tiff', class: FileSet, original_file: image_file) }
it { is_expected.to be_truthy }
end
context 'when given a non-image file' do
- let(:file_set) { double(FileSet, mime_type: 'audio/mpeg', class: FileSet) }
+ let(:file_set) { double(FileSet, mime_type: 'audio/mpeg', class: FileSet, original_file: other_file) }
it { is_expected.to be_falsey }
end
diff --git a/spec/services/iiif_print/pdf_derivative_service_spec.rb b/spec/services/iiif_print/pdf_derivative_service_spec.rb
index aec72f33..e160312a 100644
--- a/spec/services/iiif_print/pdf_derivative_service_spec.rb
+++ b/spec/services/iiif_print/pdf_derivative_service_spec.rb
@@ -5,13 +5,17 @@
file_set.save!(validate: false)
file_set
end
-
+ let(:image_file) { double(image?: true) }
let(:fixture_path) do
File.join(
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
)
end
+ before do
+ allow(valid_file_set).to receive(:original_file).and_return(image_file)
+ end
+
describe "Creates PDF derivatives" do
def source_image(name)
File.join(fixture_path, name)
diff --git a/spec/services/iiif_print/pluggable_derivative_service_spec.rb b/spec/services/iiif_print/pluggable_derivative_service_spec.rb
index 6fc9d0cc..f7098c60 100644
--- a/spec/services/iiif_print/pluggable_derivative_service_spec.rb
+++ b/spec/services/iiif_print/pluggable_derivative_service_spec.rb
@@ -2,6 +2,7 @@
require 'spec_helper'
RSpec.describe IiifPrint::PluggableDerivativeService do
+ let(:work) { MyWork.new }
let(:persisted_file_set) do
fs = FileSet.new
work.title = ['This is a page!']
@@ -11,13 +12,17 @@
work.save!(validate: false)
fs
end
-
+ let(:image_file) { double(image?: true) }
let(:fixture_path) do
File.join(
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
)
end
+ before do
+ allow(persisted_file_set).to receive(:original_file).and_return(image_file)
+ end
+
describe "service registration" do
# integration test with Hyrax, verify services is registered
@@ -29,7 +34,8 @@
file_set = double(FileSet,
class: FileSet,
mime_type: 'application/pdf',
- parent: MyIiifConfiguredWorkWithAllDerivativeServices.new)
+ parent: MyIiifConfiguredWorkWithAllDerivativeServices.new,
+ original_file: image_file)
found = Hyrax::DerivativeService.for(file_set)
expect(found).to be_a described_class
end
@@ -40,14 +46,13 @@
allow(persisted_file_set).to receive(:in_works).and_return([work])
end
- let(:work) { MyWork.new }
-
describe "#plugins" do
it "uses the default derivatives service" do
file_set = double(FileSet,
class: FileSet,
mime_type: 'application/pdf',
- parent: MyWork.new)
+ parent: MyWork.new,
+ original_file: image_file)
service = described_class.new(file_set)
expect(service.plugins).to eq [Hyrax::FileSetDerivativesService]
end