Skip to content

Commit

Permalink
Valkyrie derivatives
Browse files Browse the repository at this point in the history
Creates Hyrax::FileMetadata objects for all valkyrie derivatives.
Access Valkyrie derivatives via Hyrax methods, rather than through the duplicated derivative logic in IiifPrint.

Note: does not address remote_url logic... this is not yet valkyrized.
  • Loading branch information
LaRita Robinson committed May 14, 2024
1 parent cee9752 commit c3a11ac
Show file tree
Hide file tree
Showing 16 changed files with 111 additions and 67 deletions.
6 changes: 3 additions & 3 deletions app/indexers/concerns/iiif_print/file_set_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def find_checksum(object)
return unless file

digest ||= if file.is_a?(Hyrax::FileMetadata)
file.checksum
Array.wrap(file.checksum).first
else # file is a Hydra::PCDM::File (ActiveFedora)
file.digest.first
end
Expand All @@ -48,8 +48,8 @@ def all_text(object)
file = object.original_file
return unless file

text = IiifPrint.config.all_text_generator_function.call(object: object) || ''
return text if text.empty?
text = IiifPrint.extract_text_for(file_set: object)
return text if text.blank?

text.tr("\n", ' ').squeeze(' ')
end
Expand Down
6 changes: 5 additions & 1 deletion app/services/iiif_print/pluggable_derivative_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ class IiifPrint::PluggableDerivativeService
class_attribute :derivative_path_factory, default: Hyrax::DerivativePath

def initialize(file_set, plugins: plugins_for(file_set))
@file_set = file_set
@file_set = if file_set.is_a?(Hyrax::FileMetadata)
Hyrax.query_service.find_by(id: file_set.file_set_id)
else
file_set
end
@plugins = Array.wrap(plugins)
@valid_plugins = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
end
Expand Down
46 changes: 0 additions & 46 deletions app/views/hyrax/file_sets/_actions.html.erb

This file was deleted.

2 changes: 2 additions & 0 deletions lib/iiif_print.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ class << self

delegate(
:clean_for_tests!,
:copy_derivatives_from_data_store,
:create_relationship_between,
:destroy_children_split_from,
:extract_text_for,
:find_by,
:find_by_title_for,
:grandparent_for,
Expand Down
15 changes: 13 additions & 2 deletions lib/iiif_print/base_derivative_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ class BaseDerivativeService
class_attribute :target_extension, default: nil

def initialize(file_set)
@file_set = file_set
@file_set = if file_set.is_a?(Hyrax::FileMetadata)
Hyrax.query_service.find_by(id: file_set.file_set_id)
else
file_set
end
@dest_path = nil
@source_path = nil
@source_meta = nil
Expand All @@ -26,7 +30,10 @@ def initialize(file_set)
# @return [Boolean]
def valid?
# @note We are taking a shortcut because currently we are only concerned about images.
file_set.class.image_mime_types.include?(file_set.mime_type)
# @TODO: verify if this works for ActiveFedora and if so, remove commented code.
# If not, modify to use adapter.
# file_set.class.image_mime_types.include?(file_set.mime_type)
file_set.original_file.image?
end

def derivative_path_factory
Expand Down Expand Up @@ -110,5 +117,9 @@ def jp2_convert
# intermediate -> PDF
im_convert
end

def mime_type_for(extension)
Marcel::MimeType.for extension: extension
end
end
end
5 changes: 4 additions & 1 deletion lib/iiif_print/jp2_derivative_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ def create_derivatives(filename)
render_cmd = opj_command

# Run the generated command to make derivative file at @dest_path
`#{render_cmd}`
data = `#{render_cmd}`

# Create Hyrax::FileMetadata object for the derivatives (if Valkyrie)
IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })

# Clean up any intermediate files or symlinks used during creation
cleanup_intermediate
Expand Down
4 changes: 3 additions & 1 deletion lib/iiif_print/pdf_derivative_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def initialize(file_set)
# JP2 source, and whether we have color or grayscale material.
def convert_cmd
template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
format(template, source_file: @source_path, out_file: @dest_path)
data = format(template, source_file: @source_path, out_file: @dest_path)
IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
data
end

def create_derivatives(filename)
Expand Down
10 changes: 9 additions & 1 deletion lib/iiif_print/persistence_layer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,15 @@ def self.save(object:)
raise NotImplementedError, "#{self}.{__method__}"
end

def index_works(objects:)
def self.index_works(objects:)
raise NotImplementedError, "#{self}.{__method__}"
end

def self.copy_derivatives_from_data_store(stream:, directives:)
raise NotImplementedError, "#{self}.{__method__}"
end

def self.extract_text_for(file_set:)
raise NotImplementedError, "#{self}.{__method__}"
end
end
Expand Down
19 changes: 19 additions & 0 deletions lib/iiif_print/persistence_layer/active_fedora_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,25 @@ def self.index_works(objects:)
end
true
end

##
# does nothing for ActiveFedora;
# allows valkyrie works to have an extra step to create the Hyrax::Metadata objects.
#
# @param []
# @return [TrueClass]
def self.copy_derivatives_from_data_store(*)
true
end

##
# Extract text from the derivatives
#
# @param [FileSet] an ActiveFedora fileset
# @return [String] Text from fileset's file
def self.extract_text_for(file_set:)
IiifPrint.config.all_text_generator_function.call(object: file_set) || ''
end
end
end
end
24 changes: 24 additions & 0 deletions lib/iiif_print/persistence_layer/valkyrie_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,30 @@ def self.index_works(objects:)
end
true
end

##
# Performs an extra step to create the Hyrax::Metadata objects
# for derivatives.
#
# @param []
# @return [TrueClass]
def self.copy_derivatives_from_data_store(stream:, directives:)
Hyrax::ValkyriePersistDerivatives.call(stream, directives)
end

##
# Extract text from the derivatives
#
# @param [Hyrax::FileSet] a Valkyrie fileset
# @return [String] Text from fileset's file
def self.extract_text_for(file_set:)
fm = Hyrax.custom_queries.find_many_file_metadata_by_use(resource: file_set,
use: Hyrax::FileMetadata::Use.uri_for(use: :extracted_file))
return if fm.empty?
text_fm = fm.find { |t| t.mime_type == Marcel::MimeType.for(extension: 'txt') }
return if text_fm.nil?
text_fm.content
end
end
end
end
6 changes: 4 additions & 2 deletions lib/iiif_print/text_extraction_derivative_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ def create_derivatives_from_ocr(filename)

ocr_derivatives.each do |extension, method_name|
path = prepare_path(extension.to_s)
write(content: ocr.public_send(method_name), path: path)
write(content: ocr.public_send(method_name), path: path, extension: extension)
end
end

def write(content:, path:)
def write(content:, path:, extension:)
mime_type = mime_type_for(extension)
File.open(path, 'w') do |outfile|
outfile.write(content)
IiifPrint.copy_derivatives_from_data_store(stream: content, directives: { url: path, container: 'extracted_text', mime_type: mime_type })
end
end

Expand Down
4 changes: 3 additions & 1 deletion lib/iiif_print/text_formats_from_alto_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ module IiifPrint
# NOTE: to keep this from conflicting with TextExtractionDerivativeService,
# this class should be invoked by it, not PluggableDerivativeService.
class TextFormatsFromALTOService < BaseDerivativeService
self.target_extension = 'tiff'.freeze
self.target_extension = 'txt'.freeze

def save_derivative(destination, data)
mime_type = mime_type_for(destination)
# Load/prepare base of "pairtree" dir structure for extension, fileset
prepare_path(destination)
#
Expand All @@ -17,6 +18,7 @@ def save_derivative(destination, data)
# Write data as UTF-8 encoded text
File.open(save_path, "w:UTF-8") do |f|
f.write(data)
IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'extracted_text', mime_type: mime_type })
end
end

Expand Down
4 changes: 3 additions & 1 deletion lib/iiif_print/tiff_derivative_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def convert_cmd
source_path += '[0]' if @source_path.ends_with?('pdf')
template = use_color? ? COLOR_CMD : GRAY_CMD
template = MONO_CMD if one_bit?
format(template, source_file: source_path, out_file: @dest_path)
data = format(template, source_file: source_path, out_file: @dest_path)
IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
data
end

def create_derivatives(filename)
Expand Down
6 changes: 4 additions & 2 deletions spec/iiif_print/base_derivative_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
RSpec.describe IiifPrint::BaseDerivativeService do
let(:file_set) { double(FileSet) }
let(:service) { described_class.new(file_set) }
let(:image_file) { double(image?: true) }
let(:other_file) { double(image?: false) }

describe '#valid?' do
subject { service.valid? }

context 'when given an image file' do
let(:file_set) { double(FileSet, mime_type: 'image/tiff', class: FileSet) }
let(:file_set) { double(FileSet, mime_type: 'image/tiff', class: FileSet, original_file: image_file) }

it { is_expected.to be_truthy }
end

context 'when given a non-image file' do
let(:file_set) { double(FileSet, mime_type: 'audio/mpeg', class: FileSet) }
let(:file_set) { double(FileSet, mime_type: 'audio/mpeg', class: FileSet, original_file: other_file) }

it { is_expected.to be_falsey }
end
Expand Down
6 changes: 5 additions & 1 deletion spec/services/iiif_print/pdf_derivative_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
file_set.save!(validate: false)
file_set
end

let(:image_file) { double(image?: true) }
let(:fixture_path) do
File.join(
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
)
end

before do
allow(valid_file_set).to receive(:original_file).and_return(image_file)
end

describe "Creates PDF derivatives" do
def source_image(name)
File.join(fixture_path, name)
Expand Down
15 changes: 10 additions & 5 deletions spec/services/iiif_print/pluggable_derivative_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
require 'spec_helper'

RSpec.describe IiifPrint::PluggableDerivativeService do
let(:work) { MyWork.new }
let(:persisted_file_set) do
fs = FileSet.new
work.title = ['This is a page!']
Expand All @@ -11,13 +12,17 @@
work.save!(validate: false)
fs
end

let(:image_file) { double(image?: true) }
let(:fixture_path) do
File.join(
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
)
end

before do
allow(persisted_file_set).to receive(:original_file).and_return(image_file)
end

describe "service registration" do
# integration test with Hyrax, verify services is registered

Expand All @@ -29,7 +34,8 @@
file_set = double(FileSet,
class: FileSet,
mime_type: 'application/pdf',
parent: MyIiifConfiguredWorkWithAllDerivativeServices.new)
parent: MyIiifConfiguredWorkWithAllDerivativeServices.new,
original_file: image_file)
found = Hyrax::DerivativeService.for(file_set)
expect(found).to be_a described_class
end
Expand All @@ -40,14 +46,13 @@
allow(persisted_file_set).to receive(:in_works).and_return([work])
end

let(:work) { MyWork.new }

describe "#plugins" do
it "uses the default derivatives service" do
file_set = double(FileSet,
class: FileSet,
mime_type: 'application/pdf',
parent: MyWork.new)
parent: MyWork.new,
original_file: image_file)
service = described_class.new(file_set)
expect(service.plugins).to eq [Hyrax::FileSetDerivativesService]
end
Expand Down

0 comments on commit c3a11ac

Please sign in to comment.