Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring for hopefully making 4195 easier #4243

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 46 additions & 36 deletions app/jobs/import_url_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# and CreateWithRemoteFilesActor when files are located in some other service.
class ImportUrlJob < Hyrax::ApplicationJob
queue_as Hyrax.config.ingest_queue_name
attr_reader :file_set, :operation
attr_reader :file_set, :operation, :headers, :user, :uri

before_enqueue do |job|
operation = job.arguments[1]
Expand All @@ -17,51 +17,65 @@ class ImportUrlJob < Hyrax::ApplicationJob

# @param [FileSet] file_set
# @param [Hyrax::BatchCreateOperation] operation
def perform(file_set, operation, headers = {})
operation.performing!
user = User.find_by_user_key(file_set.depositor)
uri = URI(file_set.import_url)
name = file_set.label

# @param [Hash] headers - header data to use in interaction with remote url
# @param [Boolean] use_valkyrie - a switch on whether or not to use Valkyrie processing
#
# @todo At present, this job works for ActiveFedora objects. The use_valkyrie is not complete.
def perform(file_set, operation, headers = {}, use_valkyrie: false)
@file_set = file_set
@operation = operation
@headers = headers
operation.performing!
@user = User.find_by_user_key(file_set.depositor)
@uri = URI(file_set.import_url)

unless BrowseEverything::Retriever.can_retrieve?(uri, headers)
send_error('Expired URL')
return false
end

# @todo Use Hydra::Works::AddExternalFileToFileSet instead of manually
# copying the file here. This will be gnarly.
copy_remote_file(uri, name, headers) do |f|
# reload the FileSet once the data is copied since this is a long running task
file_set.reload
return false unless can_retrieve_remote?

# FileSetActor operates synchronously so that this tempfile is available.
# If asynchronous, the job might be invoked on a machine that did not have this temp file on its file system!
# NOTE: The return status may be successful even if the content never attaches.
log_import_status(uri, f, user)
if use_valkyrie
# TODO
else
perform_af
end
end

private

def can_retrieve_remote?
return true if BrowseEverything::Retriever.can_retrieve?(uri, headers)
send_error('Expired URL')
false
end

def perform_af
name = file_set.label

# @todo Use Hydra::Works::AddExternalFileToFileSet instead of manually
# copying the file here. This will be gnarly.
copy_remote_file(name) do |f|
# reload the FileSet once the data is copied since this is a long running task
file_set.reload

# FileSetActor operates synchronously so that this tempfile is available.
# If asynchronous, the job might be invoked on a machine that did not have this temp file on its file system!
# NOTE: The return status may be successful even if the content never attaches.
log_import_status(f)
end
end

# Download file from uri, yields a block with a file in a temporary directory.
# It is important that the file on disk has the same file name as the URL,
# because when the file in added into Fedora the file name will get persisted in the
# metadata.
# @param uri [URI] the uri of the file to download
# @param name [String] the human-readable name of the file
# @param headers [Hash] the HTTP headers for the GET request (these may contain an authentication token)
# @yield [IO] the stream to write to
def copy_remote_file(uri, name, headers = {})
def copy_remote_file(name)
filename = File.basename(name)
dir = Dir.mktmpdir
Rails.logger.debug("ImportUrlJob: Copying <#{uri}> to #{dir}")

File.open(File.join(dir, filename), 'wb') do |f|
begin
write_file(uri, f, headers)
write_file(f)
yield f
rescue StandardError => e
send_error(e.message)
Expand All @@ -74,16 +88,14 @@ def copy_remote_file(uri, name, headers = {})
# @param filename [String] the filename of the file to download
# @param error_message [String] the download error message
def send_error(error_message)
user = User.find_by_user_key(file_set.depositor)
@file_set.errors.add('Error:', error_message)
Hyrax.config.callback.run(:after_import_url_failure, @file_set, user, warn: false)
@operation.fail!(@file_set.errors.full_messages.join(' '))
file_set.errors.add('Error:', error_message)
Hyrax.config.callback.run(:after_import_url_failure, file_set, user, warn: false)
operation.fail!(file_set.errors.full_messages.join(' '))
end

# Write file to the stream
# @param uri [URI] the uri of the file to download
# @param f [IO] the stream to write to
def write_file(uri, f, headers)
def write_file(f)
retriever = BrowseEverything::Retriever.new
uri_spec = ActiveSupport::HashWithIndifferentAccess.new(url: uri, headers: headers)
retriever.retrieve(uri_spec) do |chunk|
Expand All @@ -93,14 +105,12 @@ def write_file(uri, f, headers)
end

# Set the import operation status
# @param uri [URI] the uri of the file to download
# @param f [IO] the stream to write to
# @param user [User]
def log_import_status(uri, f, user)
if Hyrax::Actors::FileSetActor.new(@file_set, user).create_content(f, from_url: true)
def log_import_status(f)
if Hyrax::Actors::FileSetActor.new(file_set, user).create_content(f, from_url: true)
operation.success!
else
send_error(uri.path, nil)
send_error(uri.path)
end
end
end