diff --git a/CHANGELOG.md b/CHANGELOG.md index fcaa798c4..3f9ecd2a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # changelog +## [2024.4] - 2024-03-26 + +### enhancements ✨ +- Refactored derivatives code to ensure PDFs are being OCR'd (and generally make the process a bit more legible) (#1010) + +### updates 📈 +- Bulkrax to 5.5.1 (#1113) + +### bug fixes 🐞 +- **All works / My works** dashboard navtabs work again (#1113) +- Removed `eduPersonEntitlement` values from cookied CAS attributes (#1115) + + ## [2024.3] - 2024-03-15 ### bug fixes 🐞 @@ -910,6 +923,7 @@ fixes: Initial pre-release (live on ldr.stage.lafayette.edu) +[2024.4]: https://github.com/LafayetteCollegeLibraries/spot/releases/tag/2024.4 [2024.3]: https://github.com/LafayetteCollegeLibraries/spot/releases/tag/2024.3 [2024.2]: https://github.com/LafayetteCollegeLibraries/spot/releases/tag/2024.2 [2024.1]: https://github.com/LafayetteCollegeLibraries/spot/releases/tag/2024.1 diff --git a/Gemfile b/Gemfile index 295245ee4..dff3ef7d3 100644 --- a/Gemfile +++ b/Gemfile @@ -53,7 +53,7 @@ gem 'bootsnap', '~> 1.17', require: false # Bulkrax for batch ingesting objects gem 'browse-everything', '~> 1.1.2' -gem 'bulkrax', '~> 5.3.0' +gem 'bulkrax', '~> 5.5.1' # This needs to be here if we want to compile our own JS # (there's like a single coffee-script file still remaining in hyrax) @@ -138,6 +138,10 @@ gem 'sprockets-es6', '~> 0.9.2' # @todo remove when Hyrax 3.5.1 or 3.6 (whichever includes it) drops gem 'redlock', '>= 0.1.2', '< 2.0' +# This is locked in hydra_editor > v6 to prevent an update +# that throws off how forms are built in Hyrax. +gem 'simple_form', '< 5.2' + # development dependencies (not as necessary to lock down versions here) group :development do # Seed data diff --git a/Gemfile.lock b/Gemfile.lock index 3f9640f1e..d0bd9ea55 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -111,7 +111,7 @@ GEM babel-transpiler (0.7.0) babel-source (>= 4.0, < 6) execjs (~> 2.0) - bagit (0.4.5) + bagit (0.4.6) docopt (~> 0.5.0) validatable (~> 1.6) bcp47 (0.3.3) @@ -176,10 +176,9 @@ GEM signet (~> 0.8) typhoeus builder (3.2.4) - bulkrax (5.3.0) + bulkrax (5.5.1) bagit (~> 0.4) coderay - dry-monads (~> 1.4.0) iso8601 (~> 0.9.0) kaminari language_list (~> 1.2, >= 1.2.1) @@ -219,7 +218,7 @@ GEM coffee-script-source execjs coffee-script-source (1.12.2) - concurrent-ruby (1.2.2) + concurrent-ruby (1.2.3) connection_pool (2.4.1) crack (0.4.5) rexml @@ -230,7 +229,7 @@ GEM activerecord (>= 5.a) database_cleaner-core (~> 2.0.0) database_cleaner-core (2.0.1) - date (3.3.3) + date (3.3.4) declarative (0.0.20) declarative-builder (0.1.0) declarative-option (< 0.2.0) @@ -530,7 +529,7 @@ GEM valkyrie (~> 2, >= 2.1.1) hyrax-spec (0.3.2) rspec (~> 3.6) - i18n (1.14.1) + i18n (1.14.4) concurrent-ruby (~> 1.0) ice_nine (0.11.2) iiif_manifest (1.3.1) @@ -652,7 +651,7 @@ GEM mailboxer (0.15.1) carrierwave (>= 0.5.8) rails (>= 5.0.0) - marcel (1.0.2) + marcel (1.0.4) matrix (0.4.2) memoist (0.16.2) method_source (1.0.0) @@ -660,37 +659,35 @@ GEM mime-types-data (~> 3.2015) mime-types-data (3.2023.0218.1) mini_magick (4.12.0) - mini_mime (1.1.2) + mini_mime (1.1.5) mini_portile2 (2.8.5) - minitest (5.18.1) + minitest (5.22.3) msgpack (1.7.1) multi_json (1.15.0) multi_xml (0.6.0) - multipart-post (2.3.0) + multipart-post (2.4.0) namae (1.1.1) nest (3.2.0) redic net-http-persistent (4.0.2) connection_pool (~> 2.2) - net-imap (0.3.6) + net-imap (0.4.10) date net-protocol net-pop (0.1.2) net-protocol - net-protocol (0.2.1) + net-protocol (0.2.2) timeout - net-smtp (0.3.3) + net-smtp (0.4.0.1) net-protocol - nio4r (2.5.9) + nio4r (2.7.0) noid (0.9.0) noid-rails (3.0.3) actionpack (>= 5.0.0, < 7) noid (~> 0.9) - nokogiri (1.15.3) + nokogiri (1.15.6) mini_portile2 (~> 2.8.2) racc (~> 1.4) - nokogiri (1.15.3-arm64-darwin) - racc (~> 1.4) non-digest-assets (2.2.0) activesupport (>= 5.2, < 7.1) sprockets (>= 2.0, < 5.0) @@ -738,7 +735,7 @@ GEM rdf raabro (1.4.0) racc (1.7.3) - rack (2.2.8) + rack (2.2.8.1) rack-cas (0.16.1) addressable (~> 2.3) nokogiri (~> 1.5) @@ -980,7 +977,7 @@ GEM faraday (>= 0.17.5, < 3.a) jwt (>= 1.5, < 3.0) multi_json (~> 1.10) - simple_form (5.2.0) + simple_form (5.1.0) actionpack (>= 5.2) activemodel (>= 5.2) simplecov (0.21.2) @@ -1036,10 +1033,10 @@ GEM temple (0.10.3) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) - thor (1.3.0) + thor (1.3.1) thread_safe (0.3.6) tilt (2.3.0) - timeout (0.4.0) + timeout (0.4.1) tinymce-rails (5.10.7.1) railties (>= 3.1.1) turbolinks (5.2.1) @@ -1090,15 +1087,14 @@ GEM hashdiff (>= 0.4.0, < 2.0.0) webrick (1.8.1) websocket (1.2.10) - websocket-driver (0.7.5) + websocket-driver (0.7.6) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) xpath (3.2.0) nokogiri (~> 1.8) - zeitwerk (2.6.8) + zeitwerk (2.6.13) PLATFORMS - arm64-darwin-22 ruby DEPENDENCIES @@ -1112,7 +1108,7 @@ DEPENDENCIES blacklight_range_limit (~> 6.3.3) bootsnap (~> 1.17) browse-everything (~> 1.1.2) - bulkrax (~> 5.3.0) + bulkrax (~> 5.5.1) byebug (~> 11.1.3) capybara (~> 3.38) capybara-screenshot (~> 1.0.26) @@ -1157,6 +1153,7 @@ DEPENDENCIES shoulda-matchers (~> 4) sidekiq (~> 5.2.9) sidekiq-cron (~> 1.9.1) + simple_form (< 5.2) simplecov (~> 0.21.2) slack-ruby-client (~> 0.14.6) spring (~> 2.1.1) diff --git a/app/services/spot/derivatives/base_derivative_service.rb b/app/services/spot/derivatives/base_derivative_service.rb new file mode 100644 index 000000000..0494520a9 --- /dev/null +++ b/app/services/spot/derivatives/base_derivative_service.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true +module Spot + module Derivatives + # Base class that other derivative services can inherit from + class BaseDerivativeService < ::Hyrax::DerivativeService + delegate :audio_mime_types, + :image_mime_types, + :pdf_mime_types, + :office_document_mime_types, + :video_mime_types, + to: :FileSet + end + end +end diff --git a/app/services/spot/derivatives/base_derivatives_service.rb b/app/services/spot/derivatives/base_derivatives_service.rb deleted file mode 100644 index 3744ab21c..000000000 --- a/app/services/spot/derivatives/base_derivatives_service.rb +++ /dev/null @@ -1,21 +0,0 @@ -# frozen_string_literal: true -module Spot - module Derivatives - # Abstract class that other derivative services can inherit from - class BaseDerivativesService - attr_reader :file_set - - def initialize(file_set) - @file_set = file_set - end - - def cleanup_derivatives - raise NotImplementedError, '#create_derivatives should be implemented by a child class of BaseDerivativesService' - end - - def create_derivatives(_filename) - raise NotImplementedError, '#create_derivatives should be implemented by a child class of BaseDerivativesService' - end - end - end -end diff --git a/app/services/spot/derivatives/access_master_service.rb b/app/services/spot/derivatives/iiif_access_copy_service.rb similarity index 50% rename from app/services/spot/derivatives/access_master_service.rb rename to app/services/spot/derivatives/iiif_access_copy_service.rb index df508f387..704b01329 100644 --- a/app/services/spot/derivatives/access_master_service.rb +++ b/app/services/spot/derivatives/iiif_access_copy_service.rb @@ -5,46 +5,30 @@ module Spot module Derivatives - # Creates access_master derivatives for all image-based works. - # Intended to be run as part of a subset within {Spot::ImageDerivativesService} - # and needs to respond to :cleanup_derivatives and :create_derivatives (the - # latter receives a source filename as a parameter). + # Creates pyramidal TIFF copies of Images for serving via IIIF. Pyramidal TIFFs contain + # layers at different resolutions which makes their use in a deep-zooming IIIF application + # (ie. UniversalViewer) more efficient. # - # When the AWS_IIIF_ASSET_BUCKET environment variable is present, this will - # write the file to a defined S3 bucket and remove the local copy. + # This generates the file locally and then uploads to an S3 bucket defined by the + # AWS_IIIF_ASSET_BUCKET environment variable. The local copy is deleted afterwards. # - # @example usage - # file_set = FileSet.find(id: 'abc123def') - # src_path = Rails.root.join('tmp', 'uploads', more_path, 'original-file.tif') - # Spot::Derivatives::AccessMasterService.new(file_set).create_derivatives(src_path) - class AccessMasterService < BaseDerivativesService - # Determines which cleanup method to use based on whether or not AWS related - # variables are present in ENV - # - # @return [void] - def cleanup_derivatives - use_s3? ? cleanup_s3_derivatives : cleanup_local_derivatives - end - - # Deletes the local access_master derivative if it exists - # - # @return [void] - def cleanup_local_derivatives - FileUtils.rm_f(derivative_path) if File.exist?(derivative_path) - end + # These derivatives are created for an FileSets that include Image mime_types. + # + # @see https://www.loc.gov/preservation/digital/formats/fdd/fdd000237.shtml + class IiifAccessCopyService < BaseDerivativeService + class_attribute :derivative_key_template + self.derivative_key_template = '%s-access.tif' # Deletes the derivative from the S3 bucket # @todo maybe we should hang onto these when we delete + put them in a glacier grave? # @return [void] # @see https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#delete_object-instance_method - def cleanup_s3_derivatives + def cleanup_derivatives s3_client.delete_object(bucket: s3_bucket, key: s3_derivative_key) end - # Since we want to pass some extended options to the creation process, - # we'll use MiniMagick directly instead of Hydra::Derviatives::ImageDerivatives. - # If AWS ENV variables are present, this will upload the generated file - # to an S3 bucket and then delete the local copy. + # Generates a pyramidal TIFF using ImageMagick (via MiniMagick gem) + # and uploads it to the S3 bucket. # # @param [String,Pathname] filename the src path of the file # @return [void] @@ -52,16 +36,20 @@ def create_derivatives(filename) output_dirname = File.dirname(derivative_path) FileUtils.mkdir_p(output_dirname) unless File.directory?(output_dirname) - MiniMagick::Tool::Convert.new do |magick| - magick << "#{filename}[0]" - # we need to use an array for each piece of this command; using a string will cause an error - magick.merge! %w[-define tiff:tile-geometry=128x128 -compress jpeg] - magick << "ptif:#{derivative_path}" + MiniMagick::Tool::Convert.new do |convert| + convert.merge!( + [ + "#{filename}[0]", + "-define", "tiff:tile-geometry=128x128", + "-compress", "jpeg", + "ptif:#{derivative_path}" + ] + ) end - return unless use_s3? + upload_derivative_to_s3 - upload_derivative_to_s3 && cleanup_local_derivatives + FileUtils.rm_f(derivative_path) if File.exist?(derivative_path) end # copied from https://github.com/samvera/hyrax/blob/5a9d1be1/app/services/hyrax/file_set_derivatives_service.rb#L32-L37 @@ -73,18 +61,30 @@ def derivative_path Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'access.tif').to_s.gsub(/\.access\.tif$/, '') end + # Only create pyramidal TIFFs if the source mime_type is an Image and if we defined + def valid? + if s3_bucket.blank? + Rails.logger.warn('Skipping IIIF Access Copy generation because the AWS_IIIF_ASSET_BUCKET environment variable is not defined.') + return false + end + + image_mime_types.include?(mime_type) + end + private def s3_bucket ENV['AWS_IIIF_ASSET_BUCKET'] end + # We're using AWS credentials stored within the App/Sidekiq services for authentication, + # so the Aws::S3::Client will pick them up ambiently. def s3_client @s3_client ||= Aws::S3::Client.new end def s3_derivative_key - "#{file_set.id}-access.tif" + derivative_key_template % file_set.id end def upload_derivative_to_s3 @@ -100,10 +100,6 @@ def upload_derivative_to_s3 } ) end - - def use_s3? - s3_bucket.present? - end end end end diff --git a/app/services/spot/derivatives/text_extraction_service.rb b/app/services/spot/derivatives/text_extraction_service.rb new file mode 100644 index 000000000..29fe92f91 --- /dev/null +++ b/app/services/spot/derivatives/text_extraction_service.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true +module Spot + module Derivatives + # Text extraction, ahem, extracted from Hyrax::FileSetDerivativesService. + # Currently only being run for PDFs, but may be expanded to include + # images if possible. This attaches a file to the file_set's with a type of + # :extracted_text. + # + # @see https://github.com/samvera/hydra-derivatives/blob/v3.6.0/lib/hydra/derivatives/processors/full_text.rb + # @see https://github.com/samvera/hydra-works/blob/v1.2.0/lib/hydra/works/services/add_file_to_file_set.rb + # + # @todo should we change the text extraction from hydra-derivatives' use + # of Solr's libraries to another service (Tesseract, AWS service?) + # @todo revisit when we upgrade to Hyrax 3 + # + class TextExtractionService < BaseDerivativeService + # Extracted text is attached to the FileSet, which is deleted when the work is. No muss no fuss! + # + # @return [void] + def cleanup_derivatives; end + + # @param [String] src_path + # @return [void] + def create_derivatives(src_path) + return unless Hyrax.config.extract_full_text? + Hydra::Derivatives::FullTextExtract.create(src_path, + outputs: [{ url: uri, container: "extracted_text" }]) + end + + # Only running text extraction on PDFs for the time being + # + # @return [true, false] + def valid? + pdf_mime_types.include? mime_type + end + + # Since the newer Hyrax method is backwards-compatible, let's use that instead of delegating to file_set + # + # @see https://github.com/samvera/hyrax/blob/hyrax-v3.5.0/app/services/hyrax/file_set_derivatives_service.rb#L13-L20 + def uri + # If given a FileMetadata object, use its parent ID. + if file_set.respond_to?(:file_set_id) + file_set.file_set_id.to_s + else + file_set.uri + end + end + end + end +end diff --git a/app/services/spot/derivatives/thumbnail_service.rb b/app/services/spot/derivatives/thumbnail_service.rb index 96cbc4759..a579a0c5c 100644 --- a/app/services/spot/derivatives/thumbnail_service.rb +++ b/app/services/spot/derivatives/thumbnail_service.rb @@ -1,11 +1,12 @@ # frozen_string_literal: true module Spot module Derivatives - # Thumbnail creation abstracted out from +Hyrax::FileSetDerivativesServices+. - # Intended to be run as part of a subset within {Spot::ImageDerivativesService} - # and needs to respond to :cleanup_derivatives and :create_derivatives (the - # latter receives a source filename as a parameter). - class ThumbnailService < BaseDerivativesService + # Thumbnail creation for file_sets (except audio mime-types). + # + # @example + # working_copy = Hyrax::WorkingDirectory.find_or_retrieve(file_set) + # Spot::Derivatives::ThumbnailServkce.new(file_set).create_derivatives(working_copy) + class ThumbnailService < BaseDerivativeService # @return [void] def cleanup_derivatives FileUtils.rm_f(derivative_path) if File.exist?(derivative_path) @@ -18,17 +19,31 @@ def cleanup_derivatives # @param [String, Pathname] filename # @return [void] # @see https://github.com/LafayetteCollegeLibraries/spot/issues/831 - def create_derivatives(filename) + def create_derivatives(src_path) output_dirname = File.dirname(derivative_path) FileUtils.mkdir_p(output_dirname) unless File.directory?(output_dirname) MiniMagick::Tool::Convert.new do |convert| - convert << "#{filename}[0]" - convert.merge! %w[-colorspace sRGB -flatten -resize 200x150> -format jpg] - convert << derivative_path + convert.merge!( + [ + "#{src_path}[0]", + "-colorspace", "sRGB", + "-flatten", + "-resize", "200x150>", + "-format", "jpg", + derivative_path + ] + ) end end + # Audio formats are the only ones we can't create a thumbnail for + # + # @return [true, false] + def valid? + !audio_mime_types.include?(mime_type) + end + private def derivative_path diff --git a/app/services/spot/file_set_derivatives_service.rb b/app/services/spot/file_set_derivatives_service.rb new file mode 100644 index 000000000..ab4930413 --- /dev/null +++ b/app/services/spot/file_set_derivatives_service.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true +module Spot + # Hacking the derivatives creation process to be more of a pipeline rather than per mime-type. + # Individual service classes are stored within the `.derivative_services` class_attribute, and + # when Spot::FileSetDerivativesService#create_derivatives or `#cleanup_derivatives`` is invoked, + # each sub-service receives the same call (if it responds true to `#valid?`). + class FileSetDerivativesService < ::Hyrax::DerivativeService + class_attribute :derivative_services, :supported_mime_types + self.derivative_services = [ + ::Spot::Derivatives::ThumbnailService, + ::Spot::Derivatives::IiifAccessCopyService, + ::Spot::Derivatives::TextExtractionService + ] + + def cleanup_derivatives + services.each do |service| + service.cleanup_derivatives if service.valid? + end + end + + def create_derivatives(working_copy_src) + services.each do |service| + service.create_derivatives(working_copy_src) if service.valid? + end + end + + def valid? + services.any?(&:valid?) + end + + private + + def services + derivative_services.map { |service| service.new(file_set) } + end + end +end diff --git a/app/services/spot/image_derivatives_service.rb b/app/services/spot/image_derivatives_service.rb deleted file mode 100644 index 1c07379dd..000000000 --- a/app/services/spot/image_derivatives_service.rb +++ /dev/null @@ -1,63 +0,0 @@ -# frozen_string_literal: true -module Spot - # Replacement for the +Hyrax::DerivativesService+ that processes only - # image mime-typed items. As of the initial implementation, it'll - # a) create a thumbnail, and b) create an access_master tif. - # - # In an initializer, add this class to the +Hyrax::DerivativeService.services+ array - # - # @example adding the service to the list of services - # # config/initializers/hydra-overrides.rb - # Hyrax::DerivativeService.services = [ - # Spot::ImageDerivativesService, - # Hyrax::FileSetDerivativesService - # ] - class ImageDerivativesService - class_attribute :services - self.services = [ - ::Spot::Derivatives::ThumbnailService, - ::Spot::Derivatives::AccessMasterService - ] - - attr_reader :file_set - delegate :uri, :mime_type, to: :file_set - - def initialize(file_set) - @file_set = file_set - end - - # Delegates cleanup to each of our services - # - # @return [void] - def cleanup_derivatives - mapped_services.each do |service| - service.cleanup_derivatives if service.respond_to?(:cleanup_derivatives) - end - end - - # Iterates through the the provided services and runs their +#create_derivatives+ - # method to allow us to do more than one thing with an image type - # - # @param [String, Pathname] filename - # @return [void] - def create_derivatives(filename) - mapped_services.each do |service| - service.create_derivatives(filename) if service.respond_to?(:create_derivatives) - end - end - - # Does the file_set we're processing have an image-esque mime-type? - # - # @return [true, false] - def valid? - file_set.class.image_mime_types.include?(mime_type) - end - - private - - # @return [Array] - def mapped_services - @mapped_services ||= services.map { |service| service.new(file_set) } - end - end -end diff --git a/app/services/spot/pdf_derivatives_service.rb b/app/services/spot/pdf_derivatives_service.rb deleted file mode 100644 index 26f34f71e..000000000 --- a/app/services/spot/pdf_derivatives_service.rb +++ /dev/null @@ -1,48 +0,0 @@ -# frozen_string_literal: true -module Spot - class PdfDerivativesService - class_attribute :services - self.services = [::Spot::Derivatives::ThumbnailService] - - attr_reader :file_set - delegate :uri, :mime_type, to: :file_set - - def initialize(file_set) - @file_set = file_set - end - - # Delegates cleanup to each of our services - # - # @return [void] - def cleanup_derivatives - mapped_services.each do |service| - service.cleanup_derivatives if service.respond_to?(:cleanup_derivatives) - end - end - - # Iterates through the the provided services and runs their +#create_derivatives+ - # method to allow us to do more than one thing with an image type - # - # @param [String, Pathname] filename - # @return [void] - def create_derivatives(filename) - mapped_services.each do |service| - service.create_derivatives(filename) if service.respond_to?(:create_derivatives) - end - end - - # Does the file_set we're processing have an image-esque mime-type? - # - # @return [true, false] - def valid? - file_set.class.pdf_mime_types.include?(mime_type) - end - - private - - # @return [Array] - def mapped_services - @mapped_services ||= services.map { |service| service.new(file_set) } - end - end -end diff --git a/config/application.rb b/config/application.rb index 4871812a6..e2d7b8882 100644 --- a/config/application.rb +++ b/config/application.rb @@ -24,7 +24,7 @@ class Application < Rails::Application config.rack_cas.server_url = ENV['CAS_BASE_URL'] config.rack_cas.service = '/users/service' - config.rack_cas.extra_attributes_filter = %w[uid email givenName surname lnumber eduPersonEntitlement] + config.rack_cas.extra_attributes_filter = %w[uid email givenName surname lnumber] # Settings in config/environments/* take precedence over those specified here. # Application configuration should go into files in config/initializers diff --git a/config/initializers/spot_overrides.rb b/config/initializers/spot_overrides.rb index c9479047f..5bab6bf37 100644 --- a/config/initializers/spot_overrides.rb +++ b/config/initializers/spot_overrides.rb @@ -24,14 +24,16 @@ Hyrax::CollectionsController.presenter_class = Spot::CollectionPresenter Hyrax::CollectionsController.include Spot::CollectionsControllerBehavior + Hyrax::CurationConcern.actor_factory.swap(Hyrax::Actors::CollectionsMembershipActor, Spot::Actors::CollectionsMembershipActor) + + # Use our own FileSetDerivativesService first and fall back to the Hyrax services + # for formats we don't currently handle uniquely. Hyrax::DerivativeService.services = [ - ::Spot::PdfDerivativesService, - ::Spot::ImageDerivativesService, + ::Spot::FileSetDerivativesService, ::Hyrax::FileSetDerivativesService ] - Hyrax::CurationConcern.actor_factory.swap(Hyrax::Actors::CollectionsMembershipActor, Spot::Actors::CollectionsMembershipActor) - + # Change the layout used for pages and the contact form Hyrax::ContactFormController.class_eval { layout 'hyrax/1_column' } Hyrax::PagesController.class_eval { layout 'hyrax/1_column' } diff --git a/spec/services/spot/derivatives/access_master_service_spec.rb b/spec/services/spot/derivatives/access_master_service_spec.rb deleted file mode 100644 index 639ab3bc6..000000000 --- a/spec/services/spot/derivatives/access_master_service_spec.rb +++ /dev/null @@ -1,119 +0,0 @@ -# frozen_string_literal: true -RSpec.describe Spot::Derivatives::AccessMasterService do - subject(:service) { described_class.new(file_set) } - - let(:file_set) { build(:file_set, id: 'abc123def') } - let(:derivative_path) { '/rails/tmp/derivatives/ab/c1/23/de/f-access.tif' } - let(:src_path) { '/original/path/to/src/file.tif' } - let(:file_size) { 0 } - let(:file_digest) { 'base64digest' } - let(:stringio) { StringIO.new('hi') } - let(:mock_digest) { instance_double(Digest::MD5, base64digest: file_digest) } - - let(:magick_commands) do - [].tap do |arr| - arr.define_singleton_method(:merge!) do |args| - args.each { |arg| self << arg } - end - end - end - - let(:expected_magick_commands) do - ["#{src_path}[0]", '-define', 'tiff:tile-geometry=128x128', '-compress', 'jpeg', "ptif:#{derivative_path}"] - end - - # rubocop:disable RSpec/InstanceVariable - before do - @aws_iiif_asset_bucket = ENV['AWS_IIIF_ASSET_BUCKET'] - ENV.delete('AWS_IIIF_ASSET_BUCKET') - - allow(Hyrax::DerivativePath) - .to receive(:derivative_path_for_reference) - .with(file_set, 'access.tif') - .and_return("#{derivative_path}.access.tif") - - allow(File).to receive(:exist?).with(derivative_path).and_return true - allow(File).to receive(:open).with(derivative_path, 'r') - allow(File).to receive(:size).with(derivative_path).and_return(file_size) - allow(File).to receive(:directory?).with(File.dirname(derivative_path)).and_return(true) - allow(FileUtils).to receive(:rm_f).with(derivative_path) - - allow(MiniMagick::Tool::Convert).to receive(:new).and_yield(magick_commands) - allow(FileUtils).to receive(:rm_f).with(File.dirname(derivative_path)) - allow(File).to receive(:open).with(derivative_path, "r").and_return(stringio) - allow(Digest::MD5).to receive(:file).with(derivative_path).and_return(mock_digest) - - allow(file_set).to receive(:width).and_return(['150']) - allow(file_set).to receive(:height).and_return(['150']) - end - - after do - ENV['AWS_IIIF_ASSET_BUCKET'] = @aws_iiif_asset_bucket if @aws_iiif_asset_bucket.present? - end - # rubocop:enable RSpec/InstanceVariable - - describe '#cleanup_derivatives' do - it 'rimrafs the derivative_path provided' do - service.cleanup_derivatives - - expect(FileUtils).to have_received(:rm_f).with(derivative_path) - end - end - - describe '#create_derivatives' do - it 'sends `convert` commands to MiniMagick' do - service.create_derivatives(src_path) - - expect(magick_commands).to eq(expected_magick_commands) - end - end - - context 'when AWS environment variables are available' do - let(:aws_iiif_asset_bucket) { 'iiif-assets' } - let(:mock_s3_client) { instance_double(Aws::S3::Client, delete_object: {}, put_object: {}) } - let(:s3_key) { "#{file_set.id}-access.tif" } - - before do - stub_env('AWS_IIIF_ASSET_BUCKET', aws_iiif_asset_bucket) - - allow(Aws::S3::Client).to receive(:new).and_return(mock_s3_client) - end - - describe '#cleanup_derivatives' do - subject { service.cleanup_derivatives } - - it 'deletes the object from S3' do - service.cleanup_derivatives - - expect(mock_s3_client) - .to have_received(:delete_object) - .with(bucket: aws_iiif_asset_bucket, key: s3_key) - end - end - - describe '#create_derivatives' do - let(:fs_width) { file_set.width.first } - let(:fs_height) { file_set.height.first } - - it 'puts the object into S3' do - service.create_derivatives(derivative_path) - - expect(mock_s3_client) - .to have_received(:put_object) - .with( - bucket: aws_iiif_asset_bucket, - key: s3_key, - body: stringio, - content_md5: file_digest, - content_length: file_size, - metadata: { - 'width' => fs_width, - 'height' => fs_height - } - ) - - expect(FileUtils).to have_received(:rm_f).with(derivative_path) - end - end - end -end diff --git a/spec/services/spot/derivatives/base_derivatives_service_spec.rb b/spec/services/spot/derivatives/base_derivatives_service_spec.rb index e8fa7ad1e..2deeee803 100644 --- a/spec/services/spot/derivatives/base_derivatives_service_spec.rb +++ b/spec/services/spot/derivatives/base_derivatives_service_spec.rb @@ -1,18 +1,6 @@ # frozen_string_literal: true -RSpec.describe Spot::Derivatives::BaseDerivativesService do - subject(:service) { described_class.new(file_set) } +RSpec.describe Spot::Derivatives::BaseDerivativeService, derivatives: true do + let(:valid_file_set) { FileSet.new } - let(:file_set) { FileSet.new } - - describe '#cleanup_derivatives' do - it 'is not implemented' do - expect { service.cleanup_derivatives }.to raise_error(NotImplementedError) - end - end - - describe '#create_derivatives' do - it 'is not implemented' do - expect { service.create_derivatives('filename') }.to raise_error(NotImplementedError) - end - end + it_behaves_like 'a Hyrax::DerivativeService' end diff --git a/spec/services/spot/derivatives/iiif_access_copy_service_spec.rb b/spec/services/spot/derivatives/iiif_access_copy_service_spec.rb new file mode 100644 index 000000000..c7e88f74f --- /dev/null +++ b/spec/services/spot/derivatives/iiif_access_copy_service_spec.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true +RSpec.describe Spot::Derivatives::IiifAccessCopyService, derivatives: true do + subject(:service) { described_class.new(file_set) } + + let(:_file_set) { build(:file_set) } + let(:file_set) { _file_set } + let(:valid_file_set) { _file_set } + + let(:mock_file) { Hydra::PCDM::File.new } + let(:derivative_path) { '/rails/tmp/derivatives/ab/c1/23/de/f-access.tif' } + let(:src_path) { '/original/path/to/src/file.tif' } + let(:file_size) { 0 } + let(:file_digest) { 'base64digest' } + let(:stringio) { StringIO.new('hi') } + let(:mock_digest) { instance_double(Digest::MD5, base64digest: file_digest) } + + let(:magick_commands) do + [].tap do |arr| + arr.define_singleton_method(:merge!) do |args| + args.each { |arg| self << arg } + end + end + end + + # AWS environment (maybe this should be a shared_context?) + let(:aws_access_key_id) { 'AWS-access_key-id' } + let(:aws_secret_access_key) { 'AWS-secret-access_key' } + let(:aws_iiif_asset_bucket) { 'iiif-assets' } + let(:mock_s3_client) { instance_double(Aws::S3::Client, delete_object: {}, put_object: {}) } + let(:s3_key) { "#{file_set.id}-access.tif" } + + before do + stub_env('AWS_ACCESS_KEY_ID', aws_access_key_id) + stub_env('AWS_SECRET_ACCESS_KEY', aws_secret_access_key) + stub_env('AWS_IIIF_ASSET_BUCKET', aws_iiif_asset_bucket) + + allow(Aws::S3::Client).to receive(:new).and_return(mock_s3_client) + + allow(Hyrax::DerivativePath) + .to receive(:derivative_path_for_reference) + .with(file_set, 'access.tif') + .and_return("#{derivative_path}.access.tif") + + allow(File).to receive(:exist?).with(derivative_path).and_return true + allow(File).to receive(:open).with(derivative_path, 'r') + allow(File).to receive(:size).with(derivative_path).and_return(file_size) + allow(File).to receive(:directory?).with(File.dirname(derivative_path)).and_return(true) + allow(FileUtils).to receive(:rm_f).with(derivative_path) + + allow(MiniMagick::Tool::Convert).to receive(:new).and_yield(magick_commands) + allow(FileUtils).to receive(:rm_f).with(File.dirname(derivative_path)) + allow(File).to receive(:open).with(derivative_path, "r").and_return(stringio) + allow(Digest::MD5).to receive(:file).with(derivative_path).and_return(mock_digest) + + allow(_file_set).to receive(:width).and_return(['150']) + allow(_file_set).to receive(:height).and_return(['150']) + allow(_file_set).to receive(:mime_type).and_return('image/tiff') + end + + it_behaves_like 'a Hyrax::DerivativeService' + + describe '#cleanup_derivatives' do + before { service.cleanup_derivatives } + + it 'deletes the object from S3' do + expect(mock_s3_client) + .to have_received(:delete_object) + .with(bucket: aws_iiif_asset_bucket, key: s3_key) + end + end + + describe '#create_derivatives' do + before { service.create_derivatives(src_path) } + + let(:fs_width) { file_set.width.first } + let(:fs_height) { file_set.height.first } + let(:expected_magick_commands) do + ["#{src_path}[0]", '-define', 'tiff:tile-geometry=128x128', '-compress', 'jpeg', "ptif:#{derivative_path}"] + end + + it 'sends `convert` commands to MiniMagick' do + expect(magick_commands).to eq(expected_magick_commands) + end + + it 'rimrafs the derivative_path provided' do + expect(FileUtils).to have_received(:rm_f).with(derivative_path) + end + + it 'puts the object into S3' do + expect(mock_s3_client) + .to have_received(:put_object) + .with( + bucket: aws_iiif_asset_bucket, + key: s3_key, + body: stringio, + content_md5: file_digest, + content_length: file_size, + metadata: { + 'width' => fs_width, + 'height' => fs_height + } + ) + end + end + + describe '#valid?' do + subject { described_class.new(file_set).valid? } + + context 'when no S3 bucket name set in environment' do + let(:aws_iiif_asset_bucket) { nil } + + before do + allow(Rails.logger).to receive(:warn) + end + + it 'logs a warning and returns false' do + expect(described_class.new(file_set).valid?).to be false + expect(Rails.logger).to have_received(:warn).with(/AWS_IIIF_ASSET_BUCKET environment variable is not defined/) + end + end + end +end diff --git a/spec/services/spot/derivatives/text_extraction_service_spec.rb b/spec/services/spot/derivatives/text_extraction_service_spec.rb new file mode 100644 index 000000000..6b73a83b7 --- /dev/null +++ b/spec/services/spot/derivatives/text_extraction_service_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true +RSpec.describe Spot::Derivatives::TextExtractionService, derivatives: true do + let(:service) { described_class.new(file_set) } + let(:file_set) { instance_double('FileSet', mime_type: fs_mime_type, uri: '') } + let(:fs_mime_type) { 'application/pdf' } + let(:fs_uri) { 'http://example.org/' } + let(:src_path) { '' } + + describe '#create_derivatives' do + before do + allow(Hyrax.config).to receive(:extract_full_text?).and_return(true) + allow(Hydra::Derivatives::FullTextExtract).to receive(:create) + end + + it 'calls Hydra::Derivatives::FullTextExtract' do + service.create_derivatives(src_path) + + expect(Hydra::Derivatives::FullTextExtract) + .to have_received(:create) + .with(src_path, outputs: [{ url: file_set.uri, container: 'extracted_text' }]) + end + + context 'when using a Valkyrized file_set' do + let(:file_set) { Struct.new(:mime_type, :file_set_id).new(fs_mime_type, file_set_id) } + let(:file_set_id) { fs_uri } + + it 'uses file_set.file_set_id.to_s' do + service.create_derivatives(src_path) + + expect(Hydra::Derivatives::FullTextExtract) + .to have_received(:create) + .with(src_path, outputs: [{ url: file_set_id, container: 'extracted_text' }]) + end + end + + context 'when disabled via Hyrax.config' do + before do + allow(Hyrax.config).to receive(:extract_full_text?).and_return(false) + end + + it 'does not call Hydra::Derivatives::FullTextExtract' do + service.create_derivatives(src_path) + + expect(Hydra::Derivatives::FullTextExtract).not_to have_received(:create) + end + end + end + + describe '#valid?' do + subject { service.valid? } + + # valid mime_types + ['application/pdf'].each do |mime_type| + context "when mime_type is #{mime_type}" do + let(:fs_mime_type) { mime_type } + + it { is_expected.to be true } + end + end + + # invalid mime_types + ['image/tiff', 'application/vnd.ms-excel', 'video/mpeg', 'audio/wav'].each do |mime_type| + context "when mime_type is #{mime_type}" do + let(:fs_mime_type) { mime_type } + + it { is_expected.to be false } + end + end + end +end diff --git a/spec/services/spot/derivatives/thumbnail_service_spec.rb b/spec/services/spot/derivatives/thumbnail_service_spec.rb index a19fe8f2e..703d86f41 100644 --- a/spec/services/spot/derivatives/thumbnail_service_spec.rb +++ b/spec/services/spot/derivatives/thumbnail_service_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -RSpec.describe Spot::Derivatives::ThumbnailService do +RSpec.describe Spot::Derivatives::ThumbnailService, derivatives: true do subject(:service) { described_class.new(file_set) } let(:file_set) { FileSet.new } @@ -58,4 +58,28 @@ expect(magick_commands).to eq(expected_commands) end end + + describe '#valid?' do + subject { service.valid? } + + before { allow(file_set).to receive(:mime_type).and_return(fs_mime_type) } + + # valid mime_types + ['image/tiff', 'application/vnd.ms-excel', 'video/mpeg', 'application/pdf'].each do |mime_type| + context "when mime_type is #{mime_type}" do + let(:fs_mime_type) { mime_type } + + it { is_expected.to be true } + end + end + + # invalid mime_types + ['audio/wav'].each do |mime_type| + context "when mime_type is #{mime_type}" do + let(:fs_mime_type) { mime_type } + + it { is_expected.to be false } + end + end + end end diff --git a/spec/services/spot/file_set_derivatives_service_spec.rb b/spec/services/spot/file_set_derivatives_service_spec.rb new file mode 100644 index 000000000..2b8efa738 --- /dev/null +++ b/spec/services/spot/file_set_derivatives_service_spec.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true +RSpec.describe Spot::FileSetDerivativesService, derivatives: true do + let(:service) { described_class.new(file_set) } + let(:file_set) { build(:file_set) } + let(:fs_mime_type) { 'image/tiff' } + let(:derivative_service_1) { instance_double('Hyrax::DerivativeService') } + let(:derivative_service_2) { instance_double('Hyrax::DerivativeService') } + let(:service_1_valid) { false } + let(:service_2_valid) { false } + let(:services) do + [ + class_double('Hyrax::DerivativeService', new: derivative_service_1), + class_double('Hyrax::DerivativeService', new: derivative_service_2) + ] + end + + # rubocop:disable RSpec/InstanceVariable + before do + allow(file_set).to receive(:mime_type).and_return(fs_mime_type) + allow(derivative_service_1).to receive(:valid?).and_return(service_1_valid) + allow(derivative_service_1).to receive(:cleanup_derivatives) + allow(derivative_service_1).to receive(:create_derivatives) + allow(derivative_service_2).to receive(:valid?).and_return(service_2_valid) + allow(derivative_service_2).to receive(:cleanup_derivatives) + allow(derivative_service_2).to receive(:create_derivatives) + + @original_services = described_class.derivative_services + described_class.derivative_services = services + end + + after do + described_class.derivative_services = @original_services + end + # rubocop:enable RSpec/InstanceVariable + + it_behaves_like 'a Hyrax::DerivativeService' do + let(:valid_file_set) { build(:file_set) } + let(:service_1_valid) { true } # required for service.valid? + end + + describe '#cleanup_derivatives' do + context 'when all services are valid' do + let(:service_1_valid) { true } + let(:service_2_valid) { true } + + it 'calls #cleanup_derivatives on each' do + service.cleanup_derivatives + + expect(derivative_service_1).to have_received(:cleanup_derivatives) + expect(derivative_service_2).to have_received(:cleanup_derivatives) + end + end + + context 'when not all services are valid' do + let(:service_1_valid) { true } + let(:service_2_valid) { false } + + it 'calls #cleanup_derivatives on each' do + service.cleanup_derivatives + + expect(derivative_service_1).to have_received(:cleanup_derivatives) + expect(derivative_service_2).not_to have_received(:cleanup_derivatives) + end + end + end + + describe '#create_derivatives' do + let(:src_path) { '/path/to/file.png' } + + context 'when all services are valid' do + let(:service_1_valid) { true } + let(:service_2_valid) { true } + + it 'calls #create_derivatives on each' do + service.create_derivatives(src_path) + + expect(derivative_service_1).to have_received(:create_derivatives).with(src_path) + expect(derivative_service_2).to have_received(:create_derivatives).with(src_path) + end + end + + context 'when not all services are valid' do + let(:service_1_valid) { true } + let(:service_2_valid) { false } + + it 'calls #cleanup_derivatives on each' do + service.create_derivatives(src_path) + + expect(derivative_service_1).to have_received(:create_derivatives).with(src_path) + expect(derivative_service_2).not_to have_received(:create_derivatives) + end + end + + describe '#valid?' do + subject { described_class.new(file_set).valid? } + + context 'when any of the services are valid' do + let(:service_1_valid) { true } + let(:service_2_valid) { false } + + it { is_expected.to be true } + end + + context 'when none of the services are valid' do + let(:service_1_valid) { false } + let(:service_2_valid) { false } + + it { is_expected.to be false } + end + end + end +end diff --git a/spec/services/spot/image_derivatives_service_spec.rb b/spec/services/spot/image_derivatives_service_spec.rb deleted file mode 100644 index 8e740510e..000000000 --- a/spec/services/spot/image_derivatives_service_spec.rb +++ /dev/null @@ -1,68 +0,0 @@ -# frozen_string_literal: true -RSpec.describe Spot::ImageDerivativesService do - subject(:service) { described_class.new(valid_file_set) } - - before do - allow(valid_file_set).to receive(:mime_type).and_return('image/jpeg') - - allow(::Spot::Derivatives::ThumbnailService).to receive(:new).and_return(thumbnail_service) - allow(::Spot::Derivatives::AccessMasterService).to receive(:new).and_return(access_master_service) - end - - let(:valid_file_set) { FileSet.new } - let(:thumbnail_service) { instance_double(::Spot::Derivatives::ThumbnailService) } - let(:access_master_service) { instance_double(::Spot::Derivatives::AccessMasterService) } - - # since we're delegating +:derivative_url+ to the individual - # derivative services, and not defining one in this service, - # using the +it_behaves_like 'a Hyrax::DerivativeService'+ - # shared_spec will fail. instead, we'll spell out those tasks here - - describe 'behaves like a Hyrax::DerivativeService (sort of)' do - it { is_expected.to respond_to(:cleanup_derivatives).with(0).arguments } - it { is_expected.to respond_to(:create_derivatives).with(1).arguments } - it { is_expected.to respond_to(:file_set) } - it { is_expected.to respond_to(:mime_type) } - end - - it 'is the service for images' do - expect(Hyrax::DerivativeService.for(valid_file_set).class).to eq described_class - end - - describe '#cleanup_derivatives' do - before do - allow(thumbnail_service).to receive(:cleanup_derivatives) - allow(access_master_service).to receive(:cleanup_derivatives) - end - - it 'calls +#cleanup_derivatives+ on both of the services' do - service.cleanup_derivatives - - expect(thumbnail_service).to have_received(:cleanup_derivatives) - expect(access_master_service).to have_received(:cleanup_derivatives) - end - end - - describe '#create_derivatives' do - let(:filename) { '/path/to/an/asset.tif' } - - before do - allow(thumbnail_service).to receive(:create_derivatives) - allow(access_master_service).to receive(:create_derivatives) - allow(FileUtils).to receive(:mkdir_p).with(File.dirname(filename)) - end - - it 'calls +#create_derivatives+ on both of the services' do - service.create_derivatives(filename) - - expect(thumbnail_service).to have_received(:create_derivatives).with(filename) - expect(access_master_service).to have_received(:create_derivatives).with(filename) - end - end - - describe '#valid?' do - subject { service.valid? } - - it { is_expected.to be true } - end -end diff --git a/spec/services/spot/pdf_derivatives_service_spec.rb b/spec/services/spot/pdf_derivatives_service_spec.rb deleted file mode 100644 index a10f6952d..000000000 --- a/spec/services/spot/pdf_derivatives_service_spec.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true -RSpec.describe Spot::PdfDerivativesService do - subject(:service) { described_class.new(valid_file_set) } - - before do - allow(valid_file_set).to receive(:mime_type).and_return('application/pdf') - allow(::Spot::Derivatives::ThumbnailService).to receive(:new).and_return(thumbnail_service) - end - - let(:valid_file_set) { FileSet.new } - let(:thumbnail_service) { instance_double(::Spot::Derivatives::ThumbnailService) } - - # since we're delegating +:derivative_url+ to the individual - # derivative services, and not defining one in this service, - # using the +it_behaves_like 'a Hyrax::DerivativeService'+ - # shared_spec will fail. instead, we'll spell out those tasks here - - describe 'behaves like a Hyrax::DerivativeService (sort of)' do - it { is_expected.to respond_to(:cleanup_derivatives).with(0).arguments } - it { is_expected.to respond_to(:create_derivatives).with(1).arguments } - it { is_expected.to respond_to(:file_set) } - it { is_expected.to respond_to(:mime_type) } - end - - it 'is the service for PDFs' do - expect(Hyrax::DerivativeService.for(valid_file_set).class).to eq described_class - end - - describe '#cleanup_derivatives' do - before do - allow(thumbnail_service).to receive(:cleanup_derivatives) - end - - it 'calls +#cleanup_derivatives+ on both of the services' do - service.cleanup_derivatives - - expect(thumbnail_service).to have_received(:cleanup_derivatives) - end - end - - describe '#create_derivatives' do - let(:filename) { '/path/to/an/asset.pdf' } - - before do - allow(thumbnail_service).to receive(:create_derivatives) - allow(FileUtils).to receive(:mkdir_p).with(File.dirname(filename)) - end - - it 'calls +#create_derivatives+ on the thumbnail services' do - service.create_derivatives(filename) - - expect(thumbnail_service).to have_received(:create_derivatives).with(filename) - end - end - - describe '#valid?' do - subject { service.valid? } - - it { is_expected.to be true } - end -end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index adc18276f..1b62015e2 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -163,7 +163,9 @@ # let webdrivers gem fetch the chrome browser and # account for our aliased services via docker allow: %w[ + googlechromelabs.github.io chromedriver.storage.googleapis.com + storage.googleapis.com db fedora fitsservlet