Skip to content

Commit

Permalink
Merge pull request #4243 from samvera/4195-allow-url-import-job-to-re…
Browse files Browse the repository at this point in the history
…ceive-valkyrie-or-activefedora-resource

Refactoring for hopefully making 4195 easier
  • Loading branch information
jcoyne authored Feb 1, 2020
2 parents 7ce7360 + bfd316e commit 3429d14
Showing 1 changed file with 46 additions and 36 deletions.
82 changes: 46 additions & 36 deletions app/jobs/import_url_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# and CreateWithRemoteFilesActor when files are located in some other service.
class ImportUrlJob < Hyrax::ApplicationJob
queue_as Hyrax.config.ingest_queue_name
attr_reader :file_set, :operation
attr_reader :file_set, :operation, :headers, :user, :uri

before_enqueue do |job|
operation = job.arguments[1]
Expand All @@ -17,51 +17,65 @@ class ImportUrlJob < Hyrax::ApplicationJob

# @param [FileSet] file_set
# @param [Hyrax::BatchCreateOperation] operation
def perform(file_set, operation, headers = {})
operation.performing!
user = User.find_by_user_key(file_set.depositor)
uri = URI(file_set.import_url)
name = file_set.label

# @param [Hash] headers - header data to use in interaction with remote url
# @param [Boolean] use_valkyrie - a switch on whether or not to use Valkyrie processing
#
# @todo At present, this job works for ActiveFedora objects. The use_valkyrie is not complete.
def perform(file_set, operation, headers = {}, use_valkyrie: false)
@file_set = file_set
@operation = operation
@headers = headers
operation.performing!
@user = User.find_by_user_key(file_set.depositor)
@uri = URI(file_set.import_url)

unless BrowseEverything::Retriever.can_retrieve?(uri, headers)
send_error('Expired URL')
return false
end

# @todo Use Hydra::Works::AddExternalFileToFileSet instead of manually
# copying the file here. This will be gnarly.
copy_remote_file(uri, name, headers) do |f|
# reload the FileSet once the data is copied since this is a long running task
file_set.reload
return false unless can_retrieve_remote?

# FileSetActor operates synchronously so that this tempfile is available.
# If asynchronous, the job might be invoked on a machine that did not have this temp file on its file system!
# NOTE: The return status may be successful even if the content never attaches.
log_import_status(uri, f, user)
if use_valkyrie
# TODO
else
perform_af
end
end

private

def can_retrieve_remote?
return true if BrowseEverything::Retriever.can_retrieve?(uri, headers)
send_error('Expired URL')
false
end

def perform_af
name = file_set.label

# @todo Use Hydra::Works::AddExternalFileToFileSet instead of manually
# copying the file here. This will be gnarly.
copy_remote_file(name) do |f|
# reload the FileSet once the data is copied since this is a long running task
file_set.reload

# FileSetActor operates synchronously so that this tempfile is available.
# If asynchronous, the job might be invoked on a machine that did not have this temp file on its file system!
# NOTE: The return status may be successful even if the content never attaches.
log_import_status(f)
end
end

# Download file from uri, yields a block with a file in a temporary directory.
# It is important that the file on disk has the same file name as the URL,
# because when the file in added into Fedora the file name will get persisted in the
# metadata.
# @param uri [URI] the uri of the file to download
# @param name [String] the human-readable name of the file
# @param headers [Hash] the HTTP headers for the GET request (these may contain an authentication token)
# @yield [IO] the stream to write to
def copy_remote_file(uri, name, headers = {})
def copy_remote_file(name)
filename = File.basename(name)
dir = Dir.mktmpdir
Rails.logger.debug("ImportUrlJob: Copying <#{uri}> to #{dir}")

File.open(File.join(dir, filename), 'wb') do |f|
begin
write_file(uri, f, headers)
write_file(f)
yield f
rescue StandardError => e
send_error(e.message)
Expand All @@ -74,16 +88,14 @@ def copy_remote_file(uri, name, headers = {})
# @param filename [String] the filename of the file to download
# @param error_message [String] the download error message
def send_error(error_message)
user = User.find_by_user_key(file_set.depositor)
@file_set.errors.add('Error:', error_message)
Hyrax.config.callback.run(:after_import_url_failure, @file_set, user, warn: false)
@operation.fail!(@file_set.errors.full_messages.join(' '))
file_set.errors.add('Error:', error_message)
Hyrax.config.callback.run(:after_import_url_failure, file_set, user, warn: false)
operation.fail!(file_set.errors.full_messages.join(' '))
end

# Write file to the stream
# @param uri [URI] the uri of the file to download
# @param f [IO] the stream to write to
def write_file(uri, f, headers)
def write_file(f)
retriever = BrowseEverything::Retriever.new
uri_spec = ActiveSupport::HashWithIndifferentAccess.new(url: uri, headers: headers)
retriever.retrieve(uri_spec) do |chunk|
Expand All @@ -93,14 +105,12 @@ def write_file(uri, f, headers)
end

# Set the import operation status
# @param uri [URI] the uri of the file to download
# @param f [IO] the stream to write to
# @param user [User]
def log_import_status(uri, f, user)
if Hyrax::Actors::FileSetActor.new(@file_set, user).create_content(f, from_url: true)
def log_import_status(f)
if Hyrax::Actors::FileSetActor.new(file_set, user).create_content(f, from_url: true)
operation.success!
else
send_error(uri.path, nil)
send_error(uri.path)
end
end
end

0 comments on commit 3429d14

Please sign in to comment.