Skip to content

Commit

Permalink
valkyrize file_actor adding file_node to hold metadata_node values fr…
Browse files Browse the repository at this point in the history
…om active fedora

Remaining Work:
* in wings persister - implement save_file_node
* provide means to convert back and forth from resource file_node and AF metadata_node
  • Loading branch information
elrayle committed Apr 12, 2019
1 parent a13b963 commit 4c18544
Show file tree
Hide file tree
Showing 11 changed files with 645 additions and 124 deletions.
85 changes: 72 additions & 13 deletions app/actors/hyrax/actors/file_actor.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'wings/services/file_node_builder'

module Hyrax
module Actors
# Actions for a file identified by file_set and relation (maps to use predicate)
Expand All @@ -8,10 +10,11 @@ class FileActor
# @param [FileSet] file_set the parent FileSet
# @param [Symbol, #to_sym] relation the type/use for the file
# @param [User] user the user to record as the Agent acting upon the file
def initialize(file_set, relation, user)
def initialize(file_set, relation, user, use_valkyrie: false)
@file_set = file_set
@relation = relation.to_sym
@relation = normalize_relation(relation, use_valkyrie: use_valkyrie)
@user = user
@use_valkyrie = use_valkyrie
end

# Persists file as part of file_set and spawns async job to characterize and create derivatives.
Expand All @@ -21,16 +24,7 @@ def initialize(file_set, relation, user)
# @see IngestJob
# @todo create a job to monitor the temp directory (or in a multi-worker system, directories!) to prune old files that have made it into the repo
def ingest_file(io)
# Skip versioning because versions will be minted by VersionCommitter as necessary during save_characterize_and_record_committer.
Hydra::Works::AddFileToFileSet.call(file_set,
io,
relation,
versioning: false)
return false unless file_set.save
repository_file = related_file
Hyrax::VersioningService.create(repository_file, user)
pathhint = io.uploaded_file.uploader.path if io.uploaded_file # in case next worker is on same filesystem
CharacterizeJob.perform_later(file_set, repository_file.id, pathhint || io.path)
perform_ingest_file(io, use_valkyrie: @use_valkyrie)
end

# Reverts file and spawns async job to characterize and create derivatives.
Expand All @@ -55,7 +49,72 @@ def ==(other)

# @return [Hydra::PCDM::File] the file referenced by relation
def related_file
file_set.public_send(relation) || raise("No #{relation} returned for FileSet #{file_set.id}")
file_set.public_send(normalize_relation(relation)) || raise("No #{relation} returned for FileSet #{file_set.id}")
end

# Persists file as part of file_set and records a new version.
# Also spawns an async job to characterize and create derivatives.
# @param [JobIoWrapper] io the file to save in the repository, with mime_type and original_name
# @return [FileNode, FalseClass] the created file node on success, false on failure
# @todo create a job to monitor the temp directory (or in a multi-worker system, directories!) to prune old files that have made it into the repo
def perform_ingest_file(io, use_valkyrie: false)
use_valkyrie ? perform_ingest_file_through_valkyrie(io) : perform_ingest_file_through_active_fedora(io)
end

def perform_ingest_file_through_active_fedora(io)
# Skip versioning because versions will be minted by VersionCommitter as necessary during save_characterize_and_record_committer.
Hydra::Works::AddFileToFileSet.call(file_set,
io,
relation,
versioning: false)
return false unless file_set.save
repository_file = related_file
Hyrax::VersioningService.create(repository_file, user)
pathhint = io.uploaded_file.uploader.path if io.uploaded_file # in case next worker is on same filesystem
CharacterizeJob.perform_later(file_set, repository_file.id, pathhint || io.path)
end

def perform_ingest_file_through_valkyrie(io)
# Skip versioning because versions will be minted by VersionCommitter as necessary during save_characterize_and_record_committer.
storage_adapter = Valkyrie.config.storage_adapter
persister = Valkyrie.config.metadata_adapter.persister # TODO: Explore why valkyrie6 branch used indexing_persister adapter for this
node_builder = Wings::FileNodeBuilder.new(storage_adapter: storage_adapter,
persister: persister)
unsaved_node = io.to_file_node
unsaved_node.use = relation
begin
saved_node = node_builder.create(file: io.file, node: unsaved_node, file_set: file_set)
rescue StandardError => e # Handle error persisting file node
Rails.logger.error("Failed to save file_node through valkyrie: #{e.message}")
return false
end
Hyrax::VersioningService.create(saved_node, user)
saved_node
end

def normalize_relation(relation, use_valkyrie: false)
use_valkyrie ? normalize_relation_for_valkyrie(relation) : normalize_relation_for_active_fedora(relation)
end

def normalize_relation_for_active_fedora(relation)
return relation if relation.is_a? Symbol
return relation.to_sym if relation.respond_to? :to_sym

# TODO: whereever these are set, they should use Valkyrie::Vocab::PCDMUse... making the casecmp unnecessary
return :original_file if relation.to_s.casecmp(Valkyrie::Vocab::PCDMUse.original_file.to_s)
return :extracted_file if relation.to_s.casecmp(Valkyrie::Vocab::PCDMUse.extracted_file.to_s)
return :thumbnail_file if relation.to_s.casecmp(Valkyrie::Vocab::PCDMUse.thumbnail_file.to_s)
:original_file # TODO: This should never happen. What should be done if none of the other conditions are met?
end

def normalize_relation_for_valkyrie(relation)
return relation if relation.is_a? RDF::URI

relation = relation.to_sym
return Valkyrie::Vocab::PCDMUse.original_file if relation == :original_file
return Valkyrie::Vocab::PCDMUse.extracted_file if relation == :extracted_file
return Valkyrie::Vocab::PCDMUse.thumbnail_file if relation == :thumbnail_file
Valkyrie::Vocab::PCDMUse.original_file # TODO: This should never happen. What should be done if none of the other conditions are met?
end
end
end
Expand Down
31 changes: 23 additions & 8 deletions app/models/job_io_wrapper.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
require 'wings/models/file_node'
require 'wings/valkyrie/query_service'

# Primarily for jobs like IngestJob to revivify an equivalent FileActor to one that existed on
# the caller's side of an asynchronous Job invocation. This involves providing slots
# for the metadata that might travel w/ the actor's various supported types of @file.
Expand Down Expand Up @@ -56,18 +59,36 @@ def mime_type
super || extracted_mime_type
end

def file_set
FileSet.find(file_set_id)
def file_set(use_valkyrie: false)
return FileSet.find(file_set_id) unless use_valkyrie
adapter = Valkyrie.config.metadata_adapter
query_service = Wings::Valkyrie::QueryService.new(adapter: adapter)
query_service.find_by(id: Valkyrie::ID.new(file_set_id))
# TODO: At least temporarily, should this return the valkyrie resource version of the fileset or the active fedora fileset?
end

def file_actor
Hyrax::Actors::FileActor.new(file_set, relation.to_sym, user)
end

# @return [FileNode, FalseClass] the created file node on success, false on failure
def ingest_file
file_actor.ingest_file(self)
end

def to_file_node
Wings::FileNode.new(label: original_name,
original_filename: original_name,
mime_type: mime_type,
use: [Valkyrie::Vocab::PCDMUse.OriginalFile])
end

# The magic that switches *once* between local filepath and CarrierWave file
# @return [File, StringIO, #read] File-like object ready to #read
def file
@file ||= (file_from_path || file_from_uploaded_file!)
end

private

def extracted_original_name
Expand All @@ -80,12 +101,6 @@ def extracted_mime_type
uploaded_file ? uploaded_file.uploader.content_type : Hydra::PCDM::GetMimeTypeForFile.call(original_name)
end

# The magic that switches *once* between local filepath and CarrierWave file
# @return [File, StringIO, #read] File-like object ready to #read
def file
@file ||= (file_from_path || file_from_uploaded_file!)
end

# @return [File, StringIO] depending on CarrierWave configuration
# @raise when uploaded_file *becomes* required but is missing
def file_from_uploaded_file!
Expand Down
81 changes: 62 additions & 19 deletions app/services/hyrax/versioning_service.rb
Original file line number Diff line number Diff line change
@@ -1,26 +1,69 @@
require 'wings/models/file_node'
require 'wings/services/file_node_builder'

module Hyrax
class VersioningService
# Make a version and record the version committer
# @param [ActiveFedora::File] content
# @param [User, String] user
def self.create(content, user = nil)
content.create_version
record_committer(content, user) if user
end
class << self
# Make a version and record the version committer
# @param [ActiveFedora::File | Wings::FileNode] content
# @param [User, String] user
def create(content, user = nil)
use_valkyrie = content.is_a? Wings::FileNode
perform_create(content, user, use_valkyrie)
end

# @param [ActiveFedora::File] file
def self.latest_version_of(file)
file.versions.last
end
# @param [ActiveFedora::File | Wings::FileNode] content
def latest_version_of(file)
file.versions.last
end

# Record the version committer of the last version
# @param [ActiveFedora::File | Wings::FileNode] content
# @param [User, String] user_key
def record_committer(content, user_key)
user_key = user_key.user_key if user_key.respond_to?(:user_key)
version = latest_version_of(content)
return if version.nil?
version_id = content.is_a?(Wings::FileNode) ? version.id.to_s : version.uri
Hyrax::VersionCommitter.create(version_id: version_id, committer_login: user_key)
end

# TODO: Copied from valkyrie6 branch. Need to explore whether this is needed?
# # @param [FileSet] file_set
# # @param [Wings::FileNode] content
# # @param [String] revision_id
# # @param [User, String] user
# def restore_version(file_set, content, revision_id, user = nil)
# found_version = content.versions.find { |x| x.label == Array.wrap(revision_id) }
# return unless found_version
# node = Wings::FileNodeBuilder.new(storage_adapter: nil, persister: indexing_adapter.persister).attach_file_node(node: found_version, file_set: file_set)
# create(node, user)
# end

private

# # TODO: Should we create and use indexing adapter for persistence? This is what was used in branch valkyrie6.
# def indexing_adapter
# Valkyrie::MetadataAdapter.find(:indexing_persister)
# end

def perform_create(content, user, use_valkyrie)
use_valkyrie ? perform_create_through_valkyrie(content, user) : perform_create_through_active_fedora(content, user)
end

def perform_create_through_active_fedora(content, user)
content.create_version
record_committer(content, user) if user
end

# Record the version committer of the last version
# @param [ActiveFedora::File] content
# @param [User, String] user_key
def self.record_committer(content, user_key)
user_key = user_key.user_key if user_key.respond_to?(:user_key)
version = latest_version_of(content)
return if version.nil?
VersionCommitter.create(version_id: version.uri, committer_login: user_key)
def perform_create_through_valkyrie(content, user)
new_version = content.new(id: nil)
new_version.label = "version#{content.member_ids.length + 1}"
# new_version = indexing_adapter.persister.save(resource: new_version)
content.member_ids = content.member_ids + [new_version.id]
content = indexing_adapter.persister.save(resource: content)
record_committer(content, user) if user
end
end
end
end
2 changes: 2 additions & 0 deletions lib/wings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ module Wings; end
Valkyrie::MetadataAdapter.register(
Wings::Valkyrie::MetadataAdapter.new, :wings_adapter
)
Valkyrie.config.metadata_adapter = :wings_adapter

Valkyrie::StorageAdapter.register(
Valkyrie::Storage::Fedora
.new(connection: Ldp::Client.new(ActiveFedora.fedora.host)),
:fedora
)
Valkyrie.config.storage_adapter = :fedora
82 changes: 82 additions & 0 deletions lib/wings/models/file_node.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# frozen_string_literal: true

module Wings
class FileNode < ::Valkyrie::Resource
# TODO: Branch valkyrie6 included the valkyrie resource access controls. Including this now causes an exception.
# Need to explore whether this line should be uncommented.
# include ::Valkyrie::Resource::AccessControls
attribute :id, ::Valkyrie::Types::ID.optional
attribute :label, ::Valkyrie::Types::Set
attribute :mime_type, ::Valkyrie::Types::Set
attribute :format_label, ::Valkyrie::Types::Set # e.g. "JPEG Image"
attribute :height, ::Valkyrie::Types::Set
attribute :width, ::Valkyrie::Types::Set
attribute :checksum, ::Valkyrie::Types::Set
attribute :size, ::Valkyrie::Types::Set
attribute :original_filename, ::Valkyrie::Types::Set
attribute :file_identifiers, ::Valkyrie::Types::Set
attribute :use, ::Valkyrie::Types::Set
attribute :member_ids, ::Valkyrie::Types::Set

# @param [ActionDispatch::Http::UploadedFile] file
def self.for(file:)
new(label: file.original_filename,
original_filename: file.original_filename,
mime_type: file.content_type,
use: file.try(:use) || [::Valkyrie::Vocab::PCDMUse.OriginalFile])
end

def original_file?
use.include?(::Valkyrie::Vocab::PCDMUse.OriginalFile)
end

def thumbnail_file?
use.include?(::Valkyrie::Vocab::PCDMUse.ThumbnailImage)
end

def extracted_file?
use.include?(::Valkyrie::Vocab::PCDMUse.ExtractedImage)
end

def title
label
end

def download_id
id
end

# @return [Boolean] whether this instance is a Wings::FileNode.
def file_node?
true
end

# @return [Boolean] whether this instance is a Hydra::Works FileSet.
def file_set?
false
end

# @return [Boolean] whether this instance is a Hydra::Works Generic Work.
def work?
false
end

# @return [Boolean] whether this instance is a Hydra::Works Collection.
def collection?
false
end

def valid?
file.valid?(size: size.first, digests: { sha256: checksum.first.sha256 })
end

def file
::Valkyrie::StorageAdapter.find_by(id: file_identifiers.first)
end

def versions
query_service = Wings::Valkyrie::QueryService.new(adapter: ::Valkyrie.config.metadata_adapter)
query_service.find_members(resource: self, model: Wings::FileNode).to_a
end
end
end
18 changes: 18 additions & 0 deletions lib/wings/models/multi_checksum.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

module Wings
class MultiChecksum < ::Valkyrie::Resource
attribute :sha256, ::Valkyrie::Types::SingleValuedString
attribute :md5, ::Valkyrie::Types::SingleValuedString
attribute :sha1, ::Valkyrie::Types::SingleValuedString

def self.for(file_object)
digests = file_object.checksum(digests: [::Digest::MD5.new, ::Digest::SHA256.new, ::Digest::SHA1.new])
MultiChecksum.new(
md5: digests.shift,
sha256: digests.shift,
sha1: digests.shift
)
end
end
end
Loading

0 comments on commit 4c18544

Please sign in to comment.