From ce2cbce91a611b36bc07742eb4ef7600799e186a Mon Sep 17 00:00:00 2001 From: adam malantonio Date: Mon, 16 Dec 2019 11:09:26 -0500 Subject: [PATCH 1/7] add suggestions from solr (#370) * open the solr docker service ports locally * add suggest services for keyword, source, and publisher * add authorities for the solr suggestion dictionaries * use qa initializer to register solr suggest authorities * get solr suggestions working like we're expecting * add command to rebuild suggest authority dictionary * add specs for the suggestions services * rewrite/reorganize how the authority is set-up to work with qa * set up source's autocomplete form element * add form fields for publisher/keyword + fix specs * prefer jquery-ui interface to select2 for solr suggestions * use document fields where available, use HighFrequency dictionary * tesim fields will split words, use a string field for suggestion sources * register + add form fields for remaining suggested properties * create a solr suggest job + call it at the end of the actor stack --- app/actors/solr_suggest_actor.rb | 44 +++++++ .../qa/authorities/solr_suggest.rb | 120 ++++++++++++++++++ .../update_solr_suggest_dictionaries_job.rb | 10 ++ .../spot/importers/base/record_importer.rb | 24 ++-- .../records/edit_fields/_contributor.html.erb | 12 ++ .../records/edit_fields/_creator.html.erb | 12 ++ .../records/edit_fields/_editor.html.erb | 12 ++ .../records/edit_fields/_keyword.html.erb | 12 ++ .../edit_fields/_organization.html.erb | 12 ++ .../records/edit_fields/_publisher.html.erb | 12 ++ .../records/edit_fields/_source.html.erb | 12 ++ config/initializers/hyrax_overrides.rb | 9 +- config/initializers/local_authorities.rb | 6 - config/initializers/qa.rb | 17 +++ config/sidekiq_schedule.yml | 5 + docker-compose.yml | 4 +- solr/config/schema.xml | 14 +- solr/config/solrconfig.xml | 61 ++++++++- spec/actors/solr_suggest_actor_spec.rb | 60 +++++++++ .../qa/authorities/solr_suggest_spec.rb | 46 +++++++ spec/features/create_publication_spec.rb | 27 ++-- ...date_solr_suggest_dictionaries_job_spec.rb | 12 ++ spec/spec_helper.rb | 1 + spec/support/select2_helpers.rb | 17 +++ .../shared_examples/record_importer.rb | 3 +- 25 files changed, 516 insertions(+), 48 deletions(-) create mode 100644 app/actors/solr_suggest_actor.rb create mode 100644 app/authorities/qa/authorities/solr_suggest.rb create mode 100644 app/jobs/spot/update_solr_suggest_dictionaries_job.rb create mode 100644 app/views/records/edit_fields/_contributor.html.erb create mode 100644 app/views/records/edit_fields/_creator.html.erb create mode 100644 app/views/records/edit_fields/_editor.html.erb create mode 100644 app/views/records/edit_fields/_keyword.html.erb create mode 100644 app/views/records/edit_fields/_organization.html.erb create mode 100644 app/views/records/edit_fields/_publisher.html.erb create mode 100644 app/views/records/edit_fields/_source.html.erb delete mode 100644 config/initializers/local_authorities.rb create mode 100644 config/initializers/qa.rb create mode 100644 spec/actors/solr_suggest_actor_spec.rb create mode 100644 spec/authorities/qa/authorities/solr_suggest_spec.rb create mode 100644 spec/jobs/spot/update_solr_suggest_dictionaries_job_spec.rb create mode 100644 spec/support/select2_helpers.rb diff --git a/app/actors/solr_suggest_actor.rb b/app/actors/solr_suggest_actor.rb new file mode 100644 index 000000000..2143fc547 --- /dev/null +++ b/app/actors/solr_suggest_actor.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true +class SolrSuggestActor < ::Hyrax::Actors::AbstractActor + # @param [Hyrax::Actors::Environment] env + # @return [void] + def create(env) + next_actor.create(env) && update_suggest_dictionaries(env) + end + + # @param [Hyrax::Actors::Environment] env + # @return [void] + def update(env) + next_actor.update(env) && update_suggest_dictionaries(env) + end + + # @param [Hyrax::Actors::Environment] env + # @return [void] + def destroy(env) + next_actor.destroy(env) && update_suggest_dictionaries(env) + end + + private + + # Enqueue the job to update the solr suggest dictionaries if this actor + # isn't a part of a batch ingest + # + # @param [Hyrax::Actors::Environment] env + # @return [void] + def update_suggest_dictionaries(env) + Spot::UpdateSolrSuggestDictionariesJob.perform_now unless part_of_batch_ingest?(env) + end + + # @return [Symbol] + def batch_ingest_key + ::Spot::Importers::Base::RecordImporter::BATCH_INGEST_KEY + end + + # Does the environment's attributes include the BATCH_INGEST_KEY? + # + # @param [Hyrax::Actors::Environment] env + # @return [true, false] + def part_of_batch_ingest?(env) + env.attributes.include?(batch_ingest_key) && env.attributes[batch_ingest_key] == true + end +end diff --git a/app/authorities/qa/authorities/solr_suggest.rb b/app/authorities/qa/authorities/solr_suggest.rb new file mode 100644 index 000000000..dc0d8d772 --- /dev/null +++ b/app/authorities/qa/authorities/solr_suggest.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true +module Qa::Authorities + # An base class for building out local authorities to use Solr's + # suggestion engine for autocomplete-options for fields. + # This is a more flexible approach than using Blacklight's + # suggestion search, which appears to only work for a single field. + # + # To begin, first ensure that a suggestion dictionary has been set-up + created + # for your field. In +schemal.xml+, you'll want to ensure that a copyfield + # has been created as the pool to draw from. Note: this field needs to be + # a stored field. + # + # @example Configuring a copyfield for suggestions + # + # + # In +solrconfig.xml+, you'll need to build a suggester. The property + # +suggestAnalyzerFieldType+ should be a simple tokenizing field. + # + # @example Configuring a suggestion dictionary + # + # keyword + # AnalyzingInfixLookupFactory + # DocumentDictionaryFactory + # suggestion_index_keyword + # false + # textSuggest + # + # false + # keyword_suggest_ssim + # + # + class SolrSuggest < Qa::Authorities::Base + BUILD_ALL_KEYWORD = :__all__ + + attr_reader :dictionary + + def self.build_dictionaries! + new(BUILD_ALL_KEYWORD).build_dictionary! + end + + def initialize(dictionary) + @dictionary = dictionary + end + + # @return [void] + def build_dictionary! + params = { 'suggest' => true } + + if dictionary == BUILD_ALL_KEYWORD + params['suggest.buildAll'] = true + else + params['suggest.dictionary'] = dictionary + params['suggest.build'] = true + end + + connection.get(suggest_path, params: params) + end + + # @return [RSolr::Client] + + def search(query) + solr_suggestion_for_query(query) + end + + def term(_id) + {} + end + + def all + [] + end + + private + + def connection + ActiveFedora::SolrService.instance.conn + end + + # @return [String] + def suggest_path + @suggest_path ||= begin + url = Rails.application.config_for(:solr)['url'] + URI.join(url + '/', 'suggest').path + end + end + + # @param [String] query + # @return [Array String>>] + def solr_suggestion_for_query(query) + params = { + 'suggest.q' => query, + 'suggest.dictionary' => dictionary + } + + raw = connection.get(suggest_path, params: params) + parse_raw_response(raw, query: query) + end + + # Takes the Solr response and transforms the results into the + # Questioning Authority preferred format. + # + # @param [Hash *>] raw + # @param [Hash] options + # @option [String] query + # The initial query, used to extract results from the returned Hash + # @return [Array String>>] + def parse_raw_response(raw, query:) + suggestions = raw.dig('suggest', dictionary, query, 'suggestions') + suggestions ||= [] + + suggestions.map do |res| + { id: res['term'], label: res['term'], value: res['term'] } + end + end + end +end diff --git a/app/jobs/spot/update_solr_suggest_dictionaries_job.rb b/app/jobs/spot/update_solr_suggest_dictionaries_job.rb new file mode 100644 index 000000000..a246432ce --- /dev/null +++ b/app/jobs/spot/update_solr_suggest_dictionaries_job.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true +module Spot + # Job to update the Solr suggestion engine's dictionaries + class UpdateSolrSuggestDictionariesJob < ::ApplicationJob + # @return [void] + def perform + ::Qa::Authorities::SolrSuggest.build_dictionaries! + end + end +end diff --git a/app/services/spot/importers/base/record_importer.rb b/app/services/spot/importers/base/record_importer.rb index 6ca760b87..9b51dd700 100644 --- a/app/services/spot/importers/base/record_importer.rb +++ b/app/services/spot/importers/base/record_importer.rb @@ -1,17 +1,18 @@ # frozen_string_literal: true -# -# A common-denominator descendent of +Darlingtonia::RecordImporter+ that -# should work for most of our importing use-cases. -# -# @example Using our StreamLogger class for info/error logging -# -# info_stream = Spot::StreamLogger.new(logger, level: ::Logger::INFO) -# error_stream = Spot::StreamLogger.new(logger, level: ::Logger::WARN) -# record_importer = Spot::Importers::Base::RecordImporter.new(work_class, -# info_stream: info_stream, -# error_stream: error_stream) module Spot::Importers::Base + # A common-denominator descendent of +Darlingtonia::RecordImporter+ that + # should work for most of our importing use-cases. + # + # @example Using our StreamLogger class for info/error logging + # + # info_stream = Spot::StreamLogger.new(logger, level: ::Logger::INFO) + # error_stream = Spot::StreamLogger.new(logger, level: ::Logger::WARN) + # record_importer = Spot::Importers::Base::RecordImporter.new(work_class, + # info_stream: info_stream, + # error_stream: error_stream) class RecordImporter < ::Darlingtonia::RecordImporter + BATCH_INGEST_KEY = :__part_of_batch_ingest__ + class_attribute :default_depositor_email, :default_admin_set_id self.default_depositor_email = 'dss@lafayette.edu' self.default_admin_set_id = AdminSet::DEFAULT_ID @@ -62,6 +63,7 @@ def create_for(record:) # @return [Hash Array<*>] def attributes_from_record(record) record.attributes.tap do |attributes| + attributes[BATCH_INGEST_KEY] = true attributes[:remote_files] = create_remote_files_list(record) attributes[:admin_set_id] ||= admin_set_id attributes[:member_of_collections_attributes] = collection_attributes unless collection_ids.empty? diff --git a/app/views/records/edit_fields/_contributor.html.erb b/app/views/records/edit_fields/_contributor.html.erb new file mode 100644 index 000000000..ebaa802cf --- /dev/null +++ b/app/views/records/edit_fields/_contributor.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/name', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/app/views/records/edit_fields/_creator.html.erb b/app/views/records/edit_fields/_creator.html.erb new file mode 100644 index 000000000..ebaa802cf --- /dev/null +++ b/app/views/records/edit_fields/_creator.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/name', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/app/views/records/edit_fields/_editor.html.erb b/app/views/records/edit_fields/_editor.html.erb new file mode 100644 index 000000000..ebaa802cf --- /dev/null +++ b/app/views/records/edit_fields/_editor.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/name', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/app/views/records/edit_fields/_keyword.html.erb b/app/views/records/edit_fields/_keyword.html.erb new file mode 100644 index 000000000..8e9366335 --- /dev/null +++ b/app/views/records/edit_fields/_keyword.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/keyword', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/app/views/records/edit_fields/_organization.html.erb b/app/views/records/edit_fields/_organization.html.erb new file mode 100644 index 000000000..77e597fb1 --- /dev/null +++ b/app/views/records/edit_fields/_organization.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/organization', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/app/views/records/edit_fields/_publisher.html.erb b/app/views/records/edit_fields/_publisher.html.erb new file mode 100644 index 000000000..a1630fa26 --- /dev/null +++ b/app/views/records/edit_fields/_publisher.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/publisher', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/app/views/records/edit_fields/_source.html.erb b/app/views/records/edit_fields/_source.html.erb new file mode 100644 index 000000000..463410bcf --- /dev/null +++ b/app/views/records/edit_fields/_source.html.erb @@ -0,0 +1,12 @@ +<%= + f.input key, + as: :multi_value, + input_html: { + class: 'form-control', + data: { + 'autocomplete-url' => '/authorities/search/local/source', + 'autocomplete' => key, + }, + }, + required: f.object.required?(key) +%> diff --git a/config/initializers/hyrax_overrides.rb b/config/initializers/hyrax_overrides.rb index 6a195f8ed..ed1f14162 100644 --- a/config/initializers/hyrax_overrides.rb +++ b/config/initializers/hyrax_overrides.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true Rails.application.config.to_prepare do - ## Spot overrides Hyrax - + # Spot overrides Hyrax Hyrax::Dashboard::CollectionsController.presenter_class = Spot::CollectionPresenter Hyrax::Dashboard::CollectionsController.form_class = Spot::Forms::CollectionForm @@ -42,4 +41,10 @@ def output super.encode('UTF-8', invalid: :replace) end end + + # Add our SolrSuggestActor to the front of the default actor-stack. This will + # trigger a build of all of the Solr suggestion dictionaries at the end of + # each create, update, destroy process (each method calls the next actor and _then_ + # enqueues the job). + Hyrax::CurationConcern.actor_factory.unshift(SolrSuggestActor) end diff --git a/config/initializers/local_authorities.rb b/config/initializers/local_authorities.rb deleted file mode 100644 index 5174e316f..000000000 --- a/config/initializers/local_authorities.rb +++ /dev/null @@ -1,6 +0,0 @@ -# frozen_string_literal: true -# this was originally in config/initializers/hyrax.rb, -# but buried at the bottom. moving it into its own file -# so that it's easier to find/configure. -# Qa::Authorities::Local.register_subauthority('languages', -# 'Spot::LanguageAuthority') diff --git a/config/initializers/qa.rb b/config/initializers/qa.rb new file mode 100644 index 000000000..642efde17 --- /dev/null +++ b/config/initializers/qa.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true +# +# QuestioningAuthority configuration + +# our Solr suggestion authorities +solr_suggestion_authorities = %w[ + keyword + name + organization + physical_medium + publisher + source +] + +solr_suggestion_authorities.each do |subauth| + Qa::Authorities::Local.register_subauthority(subauth, 'Qa::Authorities::SolrSuggest') +end diff --git a/config/sidekiq_schedule.yml b/config/sidekiq_schedule.yml index 726833962..80dad7289 100644 --- a/config/sidekiq_schedule.yml +++ b/config/sidekiq_schedule.yml @@ -12,3 +12,8 @@ clear_expired_embargoes_and_leases: cron: '0 5 * * *' class: ClearExpiredEmbargoesAndLeasesJob queue: default + +update_solr_suggest_dictionaries: + cron: '0 * * * *' + class: Spot::UpdateSolrSuggestDictionariesJob + queue: default diff --git a/docker-compose.yml b/docker-compose.yml index ae8f4f2ff..065237348 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -63,6 +63,8 @@ services: volumes: - ./solr/config:/spot-config - solr:/opt/solr/server/solr + ports: + - '8983:8983' command: solr-create -c spot-development -d /spot-config restart: always @@ -73,7 +75,7 @@ services: - .:/app - ./ingest:/ingest ports: - - "4000:3000" + - "3003:3000" restart: always depends_on: - app diff --git a/solr/config/schema.xml b/solr/config/schema.xml index c0c23f477..ed024e572 100644 --- a/solr/config/schema.xml +++ b/solr/config/schema.xml @@ -165,17 +165,16 @@ - + - - + + - - - + + +