diff --git a/Procfile b/Procfile index a577ec046..809d483fa 100644 --- a/Procfile +++ b/Procfile @@ -2,4 +2,5 @@ web: bundle exec unicorn -c ./config/unicorn.rb -p ${PORT:-3233} worker: bundle exec sidekiq -C ./config/sidekiq.yml publishing-queue-listener: bundle exec rake message_queue:listen_to_publishing_queue govuk-index-queue-listener: bundle exec rake message_queue:insert_data_into_govuk +specialist-finder-index-queue-listener: bundle exec rake message_queue:insert_data_into_specialist_finder bulk-reindex-queue-listener: bundle exec rake message_queue:bulk_insert_data_into_govuk diff --git a/config/schema/indexes/specialist-finder.json b/config/schema/indexes/specialist-finder.json new file mode 100644 index 000000000..471c6c73f --- /dev/null +++ b/config/schema/indexes/specialist-finder.json @@ -0,0 +1,48 @@ +{ + "elasticsearch_types": [ + "aaib_report", + "ai_assurance_portfolio_technique", + "algorithmic_transparency_record", + "animal_disease_case", + "asylum_support_decision", + "business_finance_support_scheme", + "cma_case", + "contact", + "countryside_stewardship_grant", + "drcf_digital_markets_research", + "drug_safety_update", + "edition", + "employment_appeal_tribunal_decision", + "employment_tribunal_decision", + "european_structural_investment_fund", + "export_health_certificate", + "farming_grant", + "flood_and_coastal_erosion_risk_management_research_report", + "hmrc_manual", + "hmrc_manual_section", + "international_development_fund", + "licence_transaction", + "life_saving_maritime_appliance_service_station", + "maib_report", + "manual", + "manual_section", + "marine_equipment_approved_recommendation", + "marine_notice", + "medical_safety_alert", + "person", + "policy", + "product_safety_alert_report_recall", + "protected_food_drink_name", + "raib_report", + "research_for_development_output", + "residential_property_tribunal_decision", + "traffic_commissioner_regulatory_decision", + "service_manual_guide", + "service_manual_topic", + "service_standard_report", + "statutory_instrument", + "tax_tribunal_decision", + "utaac_decision", + "veterans_support_organisation" + ] +} diff --git a/elasticsearch.yml b/elasticsearch.yml index d83dcd193..5e817aedc 100644 --- a/elasticsearch.yml +++ b/elasticsearch.yml @@ -2,6 +2,7 @@ production: &default base_uri: <%= ENV["ELASTICSEARCH_URI"] || 'http://localhost:9200' %> content_index_names: ["detailed", "government"] govuk_index_name: "govuk" + specialist_finder_index_name: "specialist-finder" auxiliary_index_names: ["page-traffic", "metasearch"] registry_index: "government" metasearch_index_name: "metasearch" @@ -23,6 +24,7 @@ test: base_uri: <%= ENV.fetch('ELASTICSEARCH_URI', 'http://localhost:9200') %> content_index_names: ["government_test"] govuk_index_name: "govuk_test" + specialist_finder_index_name: "specialist-finder_test" auxiliary_index_names: ["page-traffic_test", "metasearch_test"] registry_index: "government_test" metasearch_index_name: "metasearch_test" diff --git a/lib/govuk_index/presenters/elasticsearch_presenter.rb b/lib/govuk_index/presenters/elasticsearch_presenter.rb index 40b315684..b1446abbc 100644 --- a/lib/govuk_index/presenters/elasticsearch_presenter.rb +++ b/lib/govuk_index/presenters/elasticsearch_presenter.rb @@ -13,200 +13,53 @@ def type def document { - ai_assurance_technique: specialist.ai_assurance_technique, - aircraft_category: specialist.aircraft_category, - aircraft_type: specialist.aircraft_type, - alert_type: specialist.alert_type, - algorithmic_transparency_record_atrs_version: specialist.algorithmic_transparency_record_atrs_version, - algorithmic_transparency_record_capability: specialist.algorithmic_transparency_record_capability, - algorithmic_transparency_record_date_published: specialist.algorithmic_transparency_record_date_published, - algorithmic_transparency_record_function: specialist.algorithmic_transparency_record_function, - algorithmic_transparency_record_organisation: specialist.algorithmic_transparency_record_organisation, - algorithmic_transparency_record_organisation_type: specialist.algorithmic_transparency_record_organisation_type, - algorithmic_transparency_record_other_tags: specialist.algorithmic_transparency_record_other_tags, - algorithmic_transparency_record_phase: specialist.algorithmic_transparency_record_phase, - algorithmic_transparency_record_region: specialist.algorithmic_transparency_record_region, - algorithmic_transparency_record_task: specialist.algorithmic_transparency_record_task, - areas_of_interest: specialist.areas_of_interest, - assessment_date: specialist.assessment_date, - assurance_technique_approach: specialist.assurance_technique_approach, attachments: common_fields.attachments, - authors: specialist.authors, - business_sizes: specialist.business_sizes, - business_stages: specialist.business_stages, - case_state: specialist.case_state, - case_type: specialist.case_type, - category: specialist.category, - certificate_status: specialist.certificate_status, - class_category: specialist.class_category, - closed_date: specialist.closed_date, - closing_date: specialist.closing_date, - commodity_type: specialist.commodity_type, contact_groups: details.contact_groups, content_id: common_fields.content_id, content_purpose_subgroup: common_fields.content_purpose_subgroup, content_purpose_supergroup: common_fields.content_purpose_supergroup, content_store_document_type: common_fields.content_store_document_type, - continuation_link: specialist.continuation_link, - country: specialist.country, - country_of_origin: specialist.country_of_origin, - date_application: specialist.date_application, - date_of_completion: specialist.date_of_completion, - date_of_occurrence: specialist.date_of_occurrence, - date_of_start: specialist.date_of_start, - date_registration: specialist.date_registration, - date_registration_eu: specialist.date_registration_eu, - decision_subject: specialist.decision_subject, description: common_fields.description, - destination_country: specialist.destination_country, - development_sector: specialist.development_sector, - digital_market_research_area: specialist.digital_market_research_area, - digital_market_research_category: specialist.digital_market_research_category, - digital_market_research_publish_date: specialist.digital_market_research_publish_date, - digital_market_research_publisher: specialist.digital_market_research_publisher, - digital_market_research_topic: specialist.digital_market_research_topic, - disease_case_closed_date: specialist.disease_case_closed_date, - disease_case_opened_date: specialist.disease_case_opened_date, - disease_type: specialist.disease_type, document_type: type, - eligible_entities: specialist.eligible_entities, email_document_supertype: common_fields.email_document_supertype, - first_published_at: specialist.first_published_at, - flood_and_coastal_erosion_category: specialist.flood_and_coastal_erosion_category, format: common_fields.format, - fund_state: specialist.fund_state, - fund_type: specialist.fund_type, - funding_amount: specialist.funding_amount, - funding_source: specialist.funding_source, government_document_supertype: common_fields.government_document_supertype, government_name: common_fields.government_name, - grant_type: specialist.grant_type, - hidden_indexable_content: specialist.hidden_indexable_content, hmrc_manual_section_id: common_fields.section_id, image_url:, indexable_content: indexable.indexable_content, - industries: specialist.industries, - internal_notes: specialist.internal_notes, is_historic: common_fields.historic?, is_political: common_fields.political?, is_withdrawn: common_fields.withdrawn?, - issued_date: specialist.issued_date, - keyword: specialist.keyword, - key_function: specialist.key_function, - laid_date: specialist.laid_date, - land_use: specialist.land_use, - land_types: specialist.land_types, latest_change_note: details.latest_change_note, licence_identifier: details.licence_identifier, - licence_transaction_continuation_link: specialist.licence_transaction_continuation_link, - licence_transaction_industry: specialist.licence_transaction_industry, - licence_transaction_licence_identifier: specialist.licence_transaction_licence_identifier, - licence_transaction_location: specialist.licence_transaction_location, - licence_transaction_will_continue_on: specialist.licence_transaction_will_continue_on, licence_short_description: details.licence_short_description, - life_saving_maritime_appliance_service_station_regions: specialist.life_saving_maritime_appliance_service_station_regions, - life_saving_maritime_appliance_type: specialist.life_saving_maritime_appliance_type, - life_saving_maritime_appliance_manufacturer: specialist.life_saving_maritime_appliance_manufacturer, link: common_fields.link, - location: specialist.location, mainstream_browse_page_content_ids: expanded_links.mainstream_browse_page_content_ids, mainstream_browse_pages: expanded_links.mainstream_browse_pages, manual: details.parent_manual, - marine_notice_topic: specialist.marine_notice_topic, - marine_notice_type: specialist.marine_notice_type, - marine_notice_vessel_type: specialist.marine_notice_vessel_type, - market_sector: specialist.market_sector, - medical_specialism: specialist.medical_specialism, - opened_date: specialist.opened_date, organisation_content_ids: expanded_links.organisation_content_ids, organisations: expanded_links.organisations, - outcome_type: specialist.outcome_type, part_of_taxonomy_tree: expanded_links.part_of_taxonomy_tree, parts: parts.presented_parts, - payment_types: specialist.payment_types, people: expanded_links.people, policy_groups: expanded_links.policy_groups, popularity: common_fields.popularity, popularity_b: common_fields.popularity_b, primary_publishing_organisation: expanded_links.primary_publishing_organisation, - principle: specialist.principle, - product_alert_type: specialist.product_alert_type, - product_category: specialist.product_category, - product_measure_type: specialist.product_measure_type, - product_recall_alert_date: specialist.product_recall_alert_date, - product_risk_level: specialist.product_risk_level, - project_code: specialist.project_code, - project_status: specialist.project_status, - protection_type: specialist.protection_type, public_timestamp: common_fields.public_timestamp, publishing_app: common_fields.publishing_app, - railway_type: specialist.railway_type, - reason_for_protection: specialist.reason_for_protection, - reference_number: specialist.reference_number, - regions: specialist.regions, - register: specialist.register, - registered_name: specialist.registered_name, - registration: specialist.registration, rendering_app: common_fields.rendering_app, - report_type: specialist.report_type, - research_document_type: specialist.research_document_type, - result: specialist.result, - review_status: specialist.review_status, role_appointments: expanded_links.role_appointments, roles: expanded_links.roles, - sector: specialist.sector, - service_provider: specialist.service_provider, - sift_end_date: specialist.sift_end_date, - sifting_status: specialist.sifting_status, slug:, - stage: specialist.stage, - status: specialist.status, - subject: specialist.subject, taxons: expanded_links.taxons, - theme: specialist.theme, - therapeutic_area: specialist.therapeutic_area, - tiers_or_standalone_items: specialist.tiers_or_standalone_items, - time_registration: specialist.time_registration, title: common_fields.title, topical_events: expanded_links.topical_events, - topics: specialist.topics, - traditional_term_grapevine_product_category: specialist.traditional_term_grapevine_product_category, - traditional_term_language: specialist.traditional_term_language, - traditional_term_type: specialist.traditional_term_type, - tribunal_decision_categories: specialist.tribunal_decision_categories, - tribunal_decision_category: specialist.tribunal_decision_category, - tribunal_decision_country: specialist.tribunal_decision_country, - tribunal_decision_decision_date: specialist.tribunal_decision_decision_date, - tribunal_decision_judges: specialist.tribunal_decision_judges, - tribunal_decision_landmark: specialist.tribunal_decision_landmark, - tribunal_decision_reference_number: specialist.tribunal_decision_reference_number, - tribunal_decision_sub_categories: specialist.tribunal_decision_sub_categories, - tribunal_decision_sub_category: specialist.tribunal_decision_sub_category, - types_of_support: specialist.types_of_support, updated_at: common_fields.updated_at, - use_case: specialist.use_case, user_journey_document_supertype: common_fields.user_journey_document_supertype, - value_of_funding: specialist.value_of_funding, - vessel_type: specialist.vessel_type, - veterans_support_organisation_health_and_social_care: specialist.veterans_support_organisation_health_and_social_care, - veterans_support_organisation_finance: specialist.veterans_support_organisation_finance, - veterans_support_organisation_legal_and_justice: specialist.veterans_support_organisation_legal_and_justice, - veterans_support_organisation_employment_education_and_training: specialist.veterans_support_organisation_employment_education_and_training, - veterans_support_organisation_housing: specialist.veterans_support_organisation_housing, - veterans_support_organisation_families_and_children: specialist.veterans_support_organisation_families_and_children, - veterans_support_organisation_community_and_social: specialist.veterans_support_organisation_community_and_social, - veterans_support_organisation_region_england: specialist.veterans_support_organisation_region_england, - veterans_support_organisation_region_northern_ireland: specialist.veterans_support_organisation_region_northern_ireland, - veterans_support_organisation_region_scotland: specialist.veterans_support_organisation_region_scotland, - veterans_support_organisation_region_wales: specialist.veterans_support_organisation_region_wales, view_count: common_fields.view_count, - virus_strain: specialist.virus_strain, - will_continue_on: specialist.will_continue_on, - withdrawn_date: specialist.withdrawn_date, world_locations: expanded_links.world_locations, - year_adopted: specialist.year_adopted, - zone_restriction: specialist.zone_restriction, - zone_type: specialist.zone_type, }.reject { |_, v| v.nil? } end @@ -283,10 +136,6 @@ def expanded_links @expanded_links ||= ExpandedLinksPresenter.new(payload["expanded_links"]) end - def specialist - @specialist ||= SpecialistPresenter.new(payload) - end - def newslike? return false if common_fields.content_store_document_type == "fatality_notice" diff --git a/lib/index/elasticsearch_processor.rb b/lib/index/elasticsearch_processor.rb index 0183231ba..e0ef36f72 100644 --- a/lib/index/elasticsearch_processor.rb +++ b/lib/index/elasticsearch_processor.rb @@ -8,6 +8,10 @@ def self.govuk new(client: GovukIndex::Client) end + def self.specialist_finder + new(client: SpecialistFinderIndex::Client) + end + def initialize(client:) @client = client @actions = [] diff --git a/lib/rummager.rb b/lib/rummager.rb index 9f347fef3..a62d52955 100644 --- a/lib/rummager.rb +++ b/lib/rummager.rb @@ -87,7 +87,9 @@ require "govuk_index/updater" require "govuk_index/client" +require "specialist_finder_index/client" require "govuk_index/document_type_mapper" +require "specialist_finder_index/document_type_mapper" require "govuk_index/page_traffic_job" require "govuk_index/method_builder" require "govuk_index/indexable_content_sanitiser" @@ -101,12 +103,15 @@ require "govuk_index/presenters/elasticsearch_identity" require "govuk_index/presenters/elasticsearch_delete_presenter" require "govuk_index/presenters/elasticsearch_presenter" +require "specialist_finder_index/presenters/elasticsearch_presenter" require "govuk_index/presenters/expanded_links_presenter" require "govuk_index/presenters/indexable_content_presenter" require "govuk_index/presenters/parts_presenter" -require "govuk_index/presenters/specialist_presenter" +require "specialist_finder_index/presenters/specialist_presenter" require "govuk_index/publishing_event_processor" +require "specialist_finder_index/publishing_event_processor" require "govuk_index/publishing_event_job" +require "specialist_finder_index/publishing_event_job" require "govuk_index/supertype_updater" require "govuk_index/supertype_job" require "govuk_message_queue_consumer" diff --git a/lib/rummager/app.rb b/lib/rummager/app.rb index f3cbecf82..9c7a193b8 100644 --- a/lib/rummager/app.rb +++ b/lib/rummager/app.rb @@ -66,7 +66,7 @@ def require_authentication(permission) end def prevent_access_to_govuk - if index_name == "govuk" + if %w[govuk specialist-finder].include?(index_name) halt(403, "Actions to govuk index are not allowed via this endpoint, please use the message queue to update this index") end end @@ -143,6 +143,27 @@ def json_only halt(500, env["sinatra.error"].message) end + # Return results for the Specialist Finder searches + # + # For details, see docs/search-api.md + ["/specialist-documents-search.?:request_format?", "/api/specialist-documents-search.?:request_format?"].each do |path| + get path do + json_only + + query_params = parse_query_string(request.query_string) + + begin + results = SearchConfig.run_specialist_document_search(query_params) + rescue BaseParameterParser::ParseError => e + status 422 + return { error: e.error }.to_json + end + + headers["Access-Control-Allow-Origin"] = "*" + results.to_json + end + end + # Return results for the GOV.UK site search # # For details, see docs/search-api.md diff --git a/lib/search/query_builder.rb b/lib/search/query_builder.rb index e2bfb75a2..1e945498d 100644 --- a/lib/search/query_builder.rb +++ b/lib/search/query_builder.rb @@ -64,12 +64,24 @@ def query end def filter + return specialist_documents_post_filter if content_index_names.include?(SearchConfig.specialist_finder_index_name) + Search::FormatMigrator.new( search_params.search_config, base_query: QueryComponents::Filter.new(search_params).payload, ).call end + def specialist_documents_post_filter + { bool: + { + minimum_should_match: 1, + should: [{ + bool: { must: QueryComponents::Filter.new(search_params).payload }, + }], + } } + end + private attr_reader :content_index_names, :metasearch_index diff --git a/lib/search_config.rb b/lib/search_config.rb index d2828663d..b9000f426 100644 --- a/lib/search_config.rb +++ b/lib/search_config.rb @@ -10,6 +10,7 @@ class << self content_index_names spelling_index_names govuk_index_name + specialist_finder_index_name page_traffic_index_name ].each do |config_method| define_method config_method do @@ -42,7 +43,7 @@ def index_names def all_index_names # this is used to process data in the rake file when `all` is passed in as previous we skipped `govuk` # we can't update index_names at this stage as it is used in multiple spots including the index filtering - content_index_names + auxiliary_index_names + [govuk_index_name] + content_index_names + auxiliary_index_names + [govuk_index_name, specialist_finder_index_name] end def run_search(raw_parameters) @@ -51,6 +52,11 @@ def run_search(raw_parameters) search_params.search_config.run_search_with_params(search_params) end + def run_specialist_document_search(raw_parameters) + search_params = parse_parameters(raw_parameters) + search_params.search_config.run_specialist_document_search_with_params(search_params) + end + def run_batch_search(searches) search_params = [] searches.each do |search| @@ -120,6 +126,10 @@ def run_search_with_params(search_params) searcher.run(search_params) end + def run_specialist_document_search_with_params(search_params) + specialist_document_searcher.run(search_params) + end + def run_batch_search_with_params(search_params) batch_searcher.run(search_params) end @@ -148,6 +158,10 @@ def new_content_index @new_content_index ||= search_server.index_for_search([SearchConfig.govuk_index_name]) end + def specialist_documents_content_index + @specialist_documents_content_index ||= search_server.index_for_search(SearchConfig.content_index_names + [SearchConfig.specialist_finder_index_name]) + end + def base_uri cluster.uri end @@ -176,6 +190,15 @@ def searcher ) end + def specialist_document_searcher + @specialist_document_searcher ||= Search::Query.new( + content_index: specialist_documents_content_index, + registries:, + metasearch_index:, + spelling_index:, + ) + end + def batch_searcher @batch_searcher ||= Search::BatchQuery.new( content_index:, diff --git a/lib/search_server.rb b/lib/search_server.rb index 132fe9cf0..839b9801b 100644 --- a/lib/search_server.rb +++ b/lib/search_server.rb @@ -12,6 +12,7 @@ def initialize(base_uri, schema, index_names, govuk_index_name, content_index_na @govuk_index_name = govuk_index_name @content_index_names = content_index_names @search_config = search_config + @specialist_finder_index_name = SearchConfig.specialist_finder_index_name end def index_group(prefix) @@ -52,7 +53,7 @@ def validate_index_name!(index_name) def index_name_valid?(index_name) index_name.split(",").all? do |name| - @index_names.include?(name) || @govuk_index_name == name + @index_names.include?(name) || @govuk_index_name == name || @specialist_finder_index_name == name end end end diff --git a/lib/specialist_finder_index/client.rb b/lib/specialist_finder_index/client.rb new file mode 100644 index 000000000..af2df0464 --- /dev/null +++ b/lib/specialist_finder_index/client.rb @@ -0,0 +1,11 @@ +module SpecialistFinderIndex + class Client < Index::Client + private + + def index_name + # rubocop:disable Naming/MemoizedInstanceVariableName + @_index ||= SearchConfig.specialist_finder_index_name + # rubocop:enable Naming/MemoizedInstanceVariableName + end + end +end diff --git a/lib/specialist_finder_index/document_type_mapper.rb b/lib/specialist_finder_index/document_type_mapper.rb new file mode 100644 index 000000000..98ea3cd71 --- /dev/null +++ b/lib/specialist_finder_index/document_type_mapper.rb @@ -0,0 +1,17 @@ +module SpecialistFinderIndex + class DocumentTypeMapper + UNPUBLISHING_TYPES = %w[gone redirect substitute vanish].freeze + + def initialize(payload) + @payload = payload + end + + def type + @payload["document_type"] + end + + def unpublishing_type? + UNPUBLISHING_TYPES.include?(@payload["document_type"]) + end + end +end diff --git a/lib/specialist_finder_index/presenters/elasticsearch_presenter.rb b/lib/specialist_finder_index/presenters/elasticsearch_presenter.rb new file mode 100644 index 000000000..002626afc --- /dev/null +++ b/lib/specialist_finder_index/presenters/elasticsearch_presenter.rb @@ -0,0 +1,277 @@ +module SpecialistFinderIndex + class ElasticsearchPresenter + include GovukIndex::ElasticsearchIdentity + + def initialize(payload:, type_mapper:) + @payload = payload + @inferred_type = type_mapper + end + + def type + @type ||= @inferred_type.type + end + + def document + { + attachments: common_fields.attachments, + contact_groups: details.contact_groups, + content_id: common_fields.content_id, + content_purpose_subgroup: common_fields.content_purpose_subgroup, + content_purpose_supergroup: common_fields.content_purpose_supergroup, + content_store_document_type: common_fields.content_store_document_type, + description: common_fields.description, + document_type: type, + email_document_supertype: common_fields.email_document_supertype, + format: common_fields.format, + government_document_supertype: common_fields.government_document_supertype, + government_name: common_fields.government_name, + hmrc_manual_section_id: common_fields.section_id, + image_url: details.image_url, + indexable_content: indexable.indexable_content, + is_historic: common_fields.historic?, + is_political: common_fields.political?, + is_withdrawn: common_fields.withdrawn?, + latest_change_note: details.latest_change_note, + licence_identifier: details.licence_identifier, + licence_short_description: details.licence_short_description, + link: common_fields.link, + mainstream_browse_page_content_ids: expanded_links.mainstream_browse_page_content_ids, + mainstream_browse_pages: expanded_links.mainstream_browse_pages, + manual: details.parent_manual, + organisation_content_ids: expanded_links.organisation_content_ids, + organisations: expanded_links.organisations, + part_of_taxonomy_tree: expanded_links.part_of_taxonomy_tree, + parts: parts.presented_parts, + people: expanded_links.people, + policy_groups: expanded_links.policy_groups, + popularity: common_fields.popularity, + popularity_b: common_fields.popularity_b, + primary_publishing_organisation: expanded_links.primary_publishing_organisation, + public_timestamp: common_fields.public_timestamp, + publishing_app: common_fields.publishing_app, + rendering_app: common_fields.rendering_app, + role_appointments: expanded_links.role_appointments, + roles: expanded_links.roles, + taxons: expanded_links.taxons, + title: common_fields.title, + topical_events: expanded_links.topical_events, + updated_at: common_fields.updated_at, + user_journey_document_supertype: common_fields.user_journey_document_supertype, + view_count: common_fields.view_count, + world_locations: expanded_links.world_locations, + }.merge(specialist_fields).reject { |_, v| v.nil? } + end + + def specialist_fields + { + ai_assurance_technique: specialist.ai_assurance_technique, + aircraft_category: specialist.aircraft_category, + aircraft_type: specialist.aircraft_type, + alert_type: specialist.alert_type, + algorithmic_transparency_record_atrs_version: specialist.algorithmic_transparency_record_atrs_version, + algorithmic_transparency_record_capability: specialist.algorithmic_transparency_record_capability, + algorithmic_transparency_record_date_published: specialist.algorithmic_transparency_record_date_published, + algorithmic_transparency_record_function: specialist.algorithmic_transparency_record_function, + algorithmic_transparency_record_organisation: specialist.algorithmic_transparency_record_organisation, + algorithmic_transparency_record_organisation_type: specialist.algorithmic_transparency_record_organisation_type, + algorithmic_transparency_record_other_tags: specialist.algorithmic_transparency_record_other_tags, + algorithmic_transparency_record_phase: specialist.algorithmic_transparency_record_phase, + algorithmic_transparency_record_region: specialist.algorithmic_transparency_record_region, + algorithmic_transparency_record_task: specialist.algorithmic_transparency_record_task, + areas_of_interest: specialist.areas_of_interest, + assessment_date: specialist.assessment_date, + assurance_technique_approach: specialist.assurance_technique_approach, + authors: specialist.authors, + business_sizes: specialist.business_sizes, + business_stages: specialist.business_stages, + case_state: specialist.case_state, + case_type: specialist.case_type, + category: specialist.category, + certificate_status: specialist.certificate_status, + class_category: specialist.class_category, + closed_date: specialist.closed_date, + closing_date: specialist.closing_date, + commodity_type: specialist.commodity_type, + continuation_link: specialist.continuation_link, + country: specialist.country, + country_of_origin: specialist.country_of_origin, + date_application: specialist.date_application, + date_of_completion: specialist.date_of_completion, + date_of_occurrence: specialist.date_of_occurrence, + date_of_start: specialist.date_of_start, + date_registration: specialist.date_registration, + date_registration_eu: specialist.date_registration_eu, + decision_subject: specialist.decision_subject, + destination_country: specialist.destination_country, + development_sector: specialist.development_sector, + digital_market_research_area: specialist.digital_market_research_area, + digital_market_research_category: specialist.digital_market_research_category, + digital_market_research_publish_date: specialist.digital_market_research_publish_date, + digital_market_research_publisher: specialist.digital_market_research_publisher, + digital_market_research_topic: specialist.digital_market_research_topic, + disease_case_closed_date: specialist.disease_case_closed_date, + disease_case_opened_date: specialist.disease_case_opened_date, + disease_type: specialist.disease_type, + eligible_entities: specialist.eligible_entities, + first_published_at: specialist.first_published_at, + flood_and_coastal_erosion_category: specialist.flood_and_coastal_erosion_category, + fund_state: specialist.fund_state, + fund_type: specialist.fund_type, + funding_amount: specialist.funding_amount, + funding_source: specialist.funding_source, + grant_type: specialist.grant_type, + hidden_indexable_content: specialist.hidden_indexable_content, + industries: specialist.industries, + internal_notes: specialist.internal_notes, + issued_date: specialist.issued_date, + keyword: specialist.keyword, + key_function: specialist.key_function, + laid_date: specialist.laid_date, + land_use: specialist.land_use, + land_types: specialist.land_types, + licence_transaction_continuation_link: specialist.licence_transaction_continuation_link, + licence_transaction_industry: specialist.licence_transaction_industry, + licence_transaction_licence_identifier: specialist.licence_transaction_licence_identifier, + licence_transaction_location: specialist.licence_transaction_location, + licence_transaction_will_continue_on: specialist.licence_transaction_will_continue_on, + life_saving_maritime_appliance_service_station_regions: specialist.life_saving_maritime_appliance_service_station_regions, + life_saving_maritime_appliance_type: specialist.life_saving_maritime_appliance_type, + life_saving_maritime_appliance_manufacturer: specialist.life_saving_maritime_appliance_manufacturer, + location: specialist.location, + marine_notice_topic: specialist.marine_notice_topic, + marine_notice_type: specialist.marine_notice_type, + marine_notice_vessel_type: specialist.marine_notice_vessel_type, + market_sector: specialist.market_sector, + medical_specialism: specialist.medical_specialism, + opened_date: specialist.opened_date, + outcome_type: specialist.outcome_type, + payment_types: specialist.payment_types, + principle: specialist.principle, + product_alert_type: specialist.product_alert_type, + product_category: specialist.product_category, + product_measure_type: specialist.product_measure_type, + product_recall_alert_date: specialist.product_recall_alert_date, + product_risk_level: specialist.product_risk_level, + project_code: specialist.project_code, + project_status: specialist.project_status, + protection_type: specialist.protection_type, + railway_type: specialist.railway_type, + reason_for_protection: specialist.reason_for_protection, + reference_number: specialist.reference_number, + regions: specialist.regions, + register: specialist.register, + registered_name: specialist.registered_name, + registration: specialist.registration, + report_type: specialist.report_type, + research_document_type: specialist.research_document_type, + result: specialist.result, + review_status: specialist.review_status, + sector: specialist.sector, + service_provider: specialist.service_provider, + sift_end_date: specialist.sift_end_date, + sifting_status: specialist.sifting_status, + stage: specialist.stage, + status: specialist.status, + subject: specialist.subject, + theme: specialist.theme, + therapeutic_area: specialist.therapeutic_area, + tiers_or_standalone_items: specialist.tiers_or_standalone_items, + time_registration: specialist.time_registration, + topics: specialist.topics, + traditional_term_grapevine_product_category: specialist.traditional_term_grapevine_product_category, + traditional_term_language: specialist.traditional_term_language, + traditional_term_type: specialist.traditional_term_type, + tribunal_decision_categories: specialist.tribunal_decision_categories, + tribunal_decision_category: specialist.tribunal_decision_category, + tribunal_decision_country: specialist.tribunal_decision_country, + tribunal_decision_decision_date: specialist.tribunal_decision_decision_date, + tribunal_decision_judges: specialist.tribunal_decision_judges, + tribunal_decision_landmark: specialist.tribunal_decision_landmark, + tribunal_decision_reference_number: specialist.tribunal_decision_reference_number, + tribunal_decision_sub_categories: specialist.tribunal_decision_sub_categories, + tribunal_decision_sub_category: specialist.tribunal_decision_sub_category, + types_of_support: specialist.types_of_support, + use_case: specialist.use_case, + value_of_funding: specialist.value_of_funding, + vessel_type: specialist.vessel_type, + veterans_support_organisation_health_and_social_care: specialist.veterans_support_organisation_health_and_social_care, + veterans_support_organisation_finance: specialist.veterans_support_organisation_finance, + veterans_support_organisation_legal_and_justice: specialist.veterans_support_organisation_legal_and_justice, + veterans_support_organisation_employment_education_and_training: specialist.veterans_support_organisation_employment_education_and_training, + veterans_support_organisation_housing: specialist.veterans_support_organisation_housing, + veterans_support_organisation_families_and_children: specialist.veterans_support_organisation_families_and_children, + veterans_support_organisation_community_and_social: specialist.veterans_support_organisation_community_and_social, + veterans_support_organisation_region_england: specialist.veterans_support_organisation_region_england, + veterans_support_organisation_region_northern_ireland: specialist.veterans_support_organisation_region_northern_ireland, + veterans_support_organisation_region_scotland: specialist.veterans_support_organisation_region_scotland, + veterans_support_organisation_region_wales: specialist.veterans_support_organisation_region_wales, + virus_strain: specialist.virus_strain, + will_continue_on: specialist.will_continue_on, + withdrawn_date: specialist.withdrawn_date, + year_adopted: specialist.year_adopted, + zone_restriction: specialist.zone_restriction, + zone_type: specialist.zone_type, + } + end + + def updated_at + common_fields.updated_at + end + + def format + common_fields.format + end + + def base_path + common_fields.base_path + end + + def link + common_fields.link + end + + def publishing_app + common_fields.publishing_app + end + + def valid! + if format == "recommended-link" + details.url || raise(MissingExternalUrl, "url missing from details section") + else + base_path || raise(NotIdentifiable, "base_path missing from payload") + end + end + + private + + attr_reader :payload + + def indexable + GovukIndex::IndexableContentPresenter.new( + format: common_fields.format, + details: payload["details"], + sanitiser: GovukIndex::IndexableContentSanitiser.new, + ) + end + + def common_fields + @common_fields ||= GovukIndex::CommonFieldsPresenter.new(payload) + end + + def details + @details ||= GovukIndex::DetailsPresenter.new(details: payload["details"], format: common_fields.format) + end + + def parts + @parts ||= GovukIndex::PartsPresenter.new(parts: payload["details"].fetch("parts", [])) + end + + def expanded_links + @expanded_links ||= GovukIndex::ExpandedLinksPresenter.new(payload["expanded_links"]) + end + + def specialist + @specialist ||= SpecialistPresenter.new(payload) + end + end +end diff --git a/lib/govuk_index/presenters/specialist_presenter.rb b/lib/specialist_finder_index/presenters/specialist_presenter.rb similarity index 99% rename from lib/govuk_index/presenters/specialist_presenter.rb rename to lib/specialist_finder_index/presenters/specialist_presenter.rb index 415a1411a..b5d6b0ed9 100644 --- a/lib/govuk_index/presenters/specialist_presenter.rb +++ b/lib/specialist_finder_index/presenters/specialist_presenter.rb @@ -1,6 +1,6 @@ -module GovukIndex +module SpecialistFinderIndex class SpecialistPresenter - extend MethodBuilder + extend GovukIndex::MethodBuilder set_payload_method :metadata diff --git a/lib/specialist_finder_index/publishing_event_job.rb b/lib/specialist_finder_index/publishing_event_job.rb new file mode 100644 index 000000000..02bbbdd22 --- /dev/null +++ b/lib/specialist_finder_index/publishing_event_job.rb @@ -0,0 +1,106 @@ +module SpecialistFinderIndex + class ElasticsearchRetryError < StandardError; end + + class ElasticsearchInvalidResponseItemCount < StandardError; end + + class MissingTextHtmlContentType < StandardError; end + + class MultipleMessagesInElasticsearchResponse < StandardError; end + + class NotFoundError < StandardError; end + + class UnknownDocumentTypeError < StandardError; end + + class NotIdentifiable < StandardError; end + + class MissingExternalUrl < StandardError; end + + class PublishingEventJob < BaseJob + notify_of_failures + + def perform(messages) + processor = Index::ElasticsearchProcessor.specialist_finder + + messages.each do |routing_key, payload| + process_action(processor, routing_key, payload) + end + + responses = processor.commit + + (responses || []).each do |response| + process_response(response, messages) + end + # Rescuing exception to guarantee we capture all Sidekiq retries + rescue Exception # rubocop:disable Lint/RescueException + Services.statsd_client.increment("specialist_finder_index.sidekiq-retry") + raise + end + + private + + def process_action(processor, routing_key, payload) + logger.debug("Processing #{routing_key}: #{payload}") + Services.statsd_client.increment("specialist_finder_index.sidekiq-consumed") + + type_mapper = DocumentTypeMapper.new(payload) + + presenter = if type_mapper.unpublishing_type? + GovukIndex::ElasticsearchDeletePresenter.new(payload:) + else + ElasticsearchPresenter.new( + payload: GovukIndex::PayloadPreparer.new(payload).prepare, + type_mapper:, + ) + end + + presenter.valid! + + identifier = "#{presenter.link} #{presenter.type || "'unmapped type'"}" + + if type_mapper.unpublishing_type? + logger.info("#{routing_key} -> DELETE #{identifier}") + processor.delete(presenter) + else + logger.info("#{routing_key} -> INDEX #{identifier}") + processor.save(presenter) + end + + # Rescuing as we don't want to retry this class of error + rescue NotIdentifiable => e + GovukError.notify(e, extra: { message_body: payload }) + # Unpublishing messages for something that does not exist may have been + # processed out of order so we don't want to notify errbit but just allow + # the process to continue + rescue NotFoundError + logger.info("#{payload['base_path']} could not be found.") + Services.statsd_client.increment("specialist_finder_index.not-found-error") + rescue UnknownDocumentTypeError + logger.info("#{payload['document_type']} document type is not known.") + Services.statsd_client.increment("specialist_finder_index.unknown-document-type") + end + + def process_response(response, messages) + messages_with_error = [] + if response["items"].count > 1 + Services.statsd_client.increment("specialist_finder_index.elasticsearch.multiple_responses") + end + + if response["items"].count != messages.count + raise ElasticsearchInvalidResponseItemCount, "received #{response['items'].count} expected #{messages.count}" + end + + response["items"].zip(messages).each do |response_for_message, message| + messages_with_error << message unless Index::ResponseValidator.new(namespace: "specialist_finder_index").valid?(response_for_message) + end + + if messages_with_error.any? + # raise an error so that all messages are retried. + # NOTE: versioned ES actions can be performed multiple with a consistent result. + raise ElasticsearchRetryError.new( + reason: "Elasticsearch failures", + messages: "#{messages_with_error.count} of #{messages.count} failed - see ElasticsearchError's for details", + ) + end + end + end +end diff --git a/lib/specialist_finder_index/publishing_event_processor.rb b/lib/specialist_finder_index/publishing_event_processor.rb new file mode 100644 index 000000000..158200431 --- /dev/null +++ b/lib/specialist_finder_index/publishing_event_processor.rb @@ -0,0 +1,11 @@ +module SpecialistFinderIndex + class PublishingEventProcessor + def process(messages) + messages = Array(messages) # treat a single message as an array with one value + + Services.statsd_client.increment("specialist_finder_index.rabbit-mq-consumed") + SpecialistFinderIndex::PublishingEventJob.perform_async(messages.map { |msg| [msg.delivery_info[:routing_key], msg.payload] }) + messages.each(&:ack) + end + end +end diff --git a/lib/tasks/message_queue.rake b/lib/tasks/message_queue.rake index f2e755ebe..06a935b4d 100644 --- a/lib/tasks/message_queue.rake +++ b/lib/tasks/message_queue.rake @@ -9,6 +9,7 @@ namespace :message_queue do exch = Bunny::Exchange.new(channel, :topic, "published_documents") channel.queue("search_api_to_be_indexed").bind(exch, routing_key: "*.links") channel.queue("search_api_bulk_reindex").bind(exch, routing_key: "*.bulk.reindex") + channel.queue("search_api_specialist_finder_index_documents").bind(exch, routing_key: "specialist_document.*") channel.queue("search_api_govuk_index").bind(exch, routing_key: "*.*") end @@ -28,6 +29,14 @@ namespace :message_queue do ).run end + desc "Gets data from RabbitMQ and insert into specialist finder index" + task :insert_data_into_specialist_finder do + GovukMessageQueueConsumer::Consumer.new( + queue_name: "search_api_specialist_finder_index_documents", + processor: SpecialistFinderIndex::PublishingEventProcessor.new, + ).run + end + desc "Gets data from RabbitMQ and insert into govuk index (bulk reindex queue)" task :bulk_insert_data_into_govuk do GovukMessageQueueConsumer::Consumer.new( diff --git a/spec/integration/govuk_index/specialist_formats_spec.rb b/spec/integration/govuk_index/specialist_formats_spec.rb index e59e63ad7..d648a1f1a 100644 --- a/spec/integration/govuk_index/specialist_formats_spec.rb +++ b/spec/integration/govuk_index/specialist_formats_spec.rb @@ -1,6 +1,5 @@ require "spec_helper" - -RSpec.describe "SpecialistFormatTest" do +RSpec.describe "SpecialistFormatsTest" do before do bunny_mock = BunnyMock.new @channel = bunny_mock.start.channel diff --git a/spec/integration/search/search_specialist_documents_spec.rb b/spec/integration/search/search_specialist_documents_spec.rb new file mode 100644 index 000000000..9701e032e --- /dev/null +++ b/spec/integration/search/search_specialist_documents_spec.rb @@ -0,0 +1,545 @@ +require "spec_helper" +require_relative "../../support/search_integration_spec_helper" + +RSpec.configure do |c| + c.include SearchIntegrationSpecHelper +end + +RSpec.describe "SearchSpecialistDocumentsTest" do + let(:index) { "specialist-finder_test" } + + it "returns success" do + get "/specialist-documents-search?q=important" + + expect(last_response).to be_ok + end + + it "spell checking with typo" do + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + } + commit_document("government_test", document_params) + + get "/specialist-documents-search?q=ministry of magick&suggest=spelling" + + expect(parsed_response["suggested_queries"]).to eq(["ministry of magic"]) + end + + it "highlights spelling suggestions" do + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + } + commit_document("government_test", document_params) + + get "/specialist-documents-search?q=ministry of magick&suggest=spelling_with_highlighting" + + expect(parsed_response["suggested_queries"]).to eq([{ + "text" => "ministry of magic", + "highlighted" => "ministry of magic", + }]) + end + + it "spell checking with blocklisted typo" do + commit_document( + index, + { + "title" => "Brexitt", + "description" => "Brexitt", + "link" => "/brexitt", + }, + ) + + get "/specialist-documents-search?q=brexit&suggest=spelling" + + expect(parsed_response["suggested_queries"]).to eq([]) + end + + it "spell checking without typo" do + add_sample_documents(index, 1) + + get "/specialist-documents-search?q=milliband" + + expect(parsed_response["suggested_queries"]).to eq([]) + end + + it "sort by date ascending" do + add_sample_documents(index, 2) + + get "/specialist-documents-search?q=important&order=public_timestamp" + + expect(result_links.take(2)).to eq(["/specialist-finder-1", "/specialist-finder-2"]) + end + + it "sort by date descending" do + add_sample_documents(index, 2) + + get "/specialist-documents-search?q=important&order=-public_timestamp" + + # The government links have dates, so appear before all the other links. + # The other documents have no dates, so appear in an undefined order + expect(result_links.take(2)).to eq(["/specialist-finder-1", "/specialist-finder-2"]) + end + + it "sort by title ascending" do + add_sample_documents(index, 1) + + get "/specialist-documents-search?order=title" + lowercase_titles = result_titles.map(&:downcase) + + expect(lowercase_titles).to eq(["sample specialist-finder document 1"]) + end + + it "filter by field" do + add_sample_documents(index, 1) + + get "/specialist-documents-search?filter_mainstream_browse_pages=browse/page/1" + + expect(result_links.sort).to eq(["/specialist-finder-1"]) + end + + it "reject by field" do + add_sample_documents(index, 2) + + get "/specialist-documents-search?reject_mainstream_browse_pages=browse/page/1" + + expect(result_links.sort).to eq(["/specialist-finder-2"]) + end + + it "can filter for missing field" do + add_sample_documents(index, 1) + + get "/specialist-documents-search?filter_manual=_MISSING" + + expect(result_links.sort).to eq(["/specialist-finder-1"]) + end + + it "can filter for missing or specific value in field" do + add_sample_documents(index, 1) + + get "/specialist-documents-search?filter_document_type[]=_MISSING&filter_document_type[]=edition" + + expect(result_links.sort).to eq(["/specialist-finder-1"]) + end + + it "can filter and reject" do + add_sample_documents(index, 2) + + get "/specialist-documents-search?reject_mainstream_browse_pages=1&filter_document_type[]=edition" + + expect(result_links.sort).to eq(["/specialist-finder-1", "/specialist-finder-2"]) + end + + describe "filter/reject when an attribute has multiple values" do + before do + commit_document( + index, + { + "link" => "/one", + "part_of_taxonomy_tree" => %w[a b c], + }, + ) + commit_document( + index, + { + "link" => "/two", + "part_of_taxonomy_tree" => %w[d e f], + }, + ) + commit_document( + index, + { + "link" => "/three", + "part_of_taxonomy_tree" => %w[b e], + }, + ) + end + + describe "filter_all" do + it "filters all documents containing taxon b and e" do + get "/specialist-documents-search?filter_all_part_of_taxonomy_tree=b&filter_all_part_of_taxonomy_tree=e" + expect(result_links.sort).to eq([ + "/three", + ]) + end + end + + describe "filter_any" do + it "filters any document containing taxon c or f" do + get "/specialist-documents-search?filter_any_part_of_taxonomy_tree=c&filter_any_part_of_taxonomy_tree=f" + expect(result_links.sort).to match_array([ + "/one", "/two" + ]) + end + end + + describe "reject_all" do + it "rejects all documents containing taxon b and e" do + get "/specialist-documents-search?reject_all_part_of_taxonomy_tree=b&reject_all_part_of_taxonomy_tree=e" + expect(result_links.sort).to match_array([ + "/one", "/two" + ]) + end + end + + describe "reject_any" do + it "rejects any documents containing taxon c or f" do + get "/specialist-documents-search?reject_any_part_of_taxonomy_tree=c&reject_any_part_of_taxonomy_tree=f" + expect(result_links.sort).to match_array([ + "/three", + ]) + end + end + end + + describe "boolean filtering" do + context "when boolean filters are not true or false" do + it "returns an error" do + get "/specialist-documents-search?filter_is_withdrawn=blah" + + expect(last_response.status).to eq(422) + expect(parsed_response).to eq({ "error" => "is_withdrawn requires a boolean (true or false)" }) + end + end + + context "when an invalid filter is used" do + it "returns an error" do + get "/specialist-documents-search?filter_has_some_very_incorrect_filter=false" + + expect(last_response.status).to eq(422) + expect(parsed_response).to eq({ "error" => "\"has_some_very_incorrect_filter\" is not a valid filter field" }) + end + end + + context "when a valid filter is used" do + before do + add_sample_documents(index, 2) + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + "has_official_document" => true, + } + commit_document("government_test", document_params) + + document_params = { + "title" => "Advice on Treatment of Dragons", + "link" => "/dragon-guide", + "has_official_document" => false, + } + commit_document("government_test", document_params) + end + + it "can filter on boolean fields = true" do + get "/specialist-documents-search?filter_has_official_document=true" + expect(result_links.sort).to eq(%w[/ministry-of-magic-site]) + end + + it "can filter on boolean fields = false" do + get "/specialist-documents-search?filter_has_official_document=false" + + expect(result_links.sort).to eq(%w[/dragon-guide]) + end + end + end + + it "only contains fields which are present" do + add_sample_documents(index, 2) + + get "/specialist-documents-search?q=important&order=public_timestamp" + + results = parsed_response["results"] + expect(results[1]["title"]).to eq("Sample specialist-finder document 2") + end + + it "validates integer params" do + get "/specialist-documents-search?start=a" + + expect(last_response.status).to eq(422) + expect(parsed_response).to eq({ "error" => "Invalid value \"a\" for parameter \"start\" (expected positive integer)" }) + end + + it "allows integer params leading zeros" do + get "/specialist-documents-search?start=09" + + expect(last_response).to be_ok + end + + it "validates unknown params" do + get "/specialist-documents-search?foo&bar=1" + + expect(last_response.status).to eq(422) + expect(parsed_response).to eq("error" => "Unexpected parameters: foo, bar") + end + + it "debug explain returns explanations" do + add_sample_documents(index, 1) + + get "/specialist-documents-search?debug=explain" + + first_hit_explain = parsed_response["results"].first["_explanation"] + expect(first_hit_explain).not_to be_nil + expect(first_hit_explain.keys).to include("value") + expect(first_hit_explain.keys).to include("description") + expect(first_hit_explain.keys).to include("details") + end + + it "can scope by elasticsearch type" do + commit_document(index, cma_case_attributes, type: "cma_case") + + get "/specialist-documents-search?filter_document_type=cma_case" + + expect(last_response).to be_ok + expect(parsed_response.fetch("total")).to eq(1) + expect(parsed_response.fetch("results").fetch(0)).to match( + hash_including( + "document_type" => "cma_case", + "title" => cma_case_attributes.fetch("title"), + "link" => cma_case_attributes.fetch("link"), + ), + ) + end + + it "can filter between dates" do + commit_document(index, cma_case_attributes, type: "cma_case") + + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=from:2014-03-31,to:2014-04-02" + + expect(last_response).to be_ok + expect(parsed_response.fetch("total")).to eq(1) + expect(parsed_response.fetch("results").fetch(0)).to match( + hash_including( + "title" => cma_case_attributes.fetch("title"), + "link" => cma_case_attributes.fetch("link"), + ), + ) + end + + it "can filter between dates with reversed parameter order" do + commit_document(index, cma_case_attributes, type: "cma_case") + + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=to:2014-04-02,from:2014-03-31" + + expect(last_response).to be_ok + expect(parsed_response.fetch("total")).to eq(1) + expect(parsed_response.fetch("results").fetch(0)).to match( + hash_including( + "title" => cma_case_attributes.fetch("title"), + "link" => cma_case_attributes.fetch("link"), + ), + ) + end + + it "can filter from date" do + commit_filter_from_date_documents(index) + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=from:2014-03-31" + + expect(last_response).to be_ok + expect_response_includes_matching_date_and_datetime_results(parsed_response.fetch("results")) + end + + it "can filter from time" do + commit_filter_from_time_documents(index) + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=from:2014-03-31 14:00:00" + + expect(last_response).to be_ok + expect_response_includes_matching_date_and_datetime_results(parsed_response.fetch("results")) + end + + it "can filter to date" do + commit_filter_to_date_documents(index) + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=to:2014-04-02" + + expect(last_response).to be_ok + expect_response_includes_matching_date_and_datetime_results(parsed_response.fetch("results")) + end + + it "can filter to time" do + commit_filter_to_time_documents(index) + + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=to:2014-04-02 11:00:00" + + expect(last_response).to be_ok + expect_response_includes_matching_date_and_datetime_results(parsed_response.fetch("results")) + end + + it "can filter times in different time zones" do + commit_document( + index, + cma_case_attributes("opened_date" => "2017-07-01T11:20:00.000-03:00", "link" => "/cma-1"), + type: "cma_case", + ) + commit_document( + index, + cma_case_attributes("opened_date" => "2017-07-02T00:15:00.000+01:00", "link" => "/cma-2"), + type: "cma_case", + ) + + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=from:2017-07-01 12:00,to:2017-07-01 23:30:00" + + expect(last_response).to be_ok + expect(parsed_response.fetch("results")).to contain_exactly( + hash_including("link" => "/cma-1"), + hash_including("link" => "/cma-2"), + ) + end + + it "cannot provide date filter key multiple times" do + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date[]=from:2014-03-31&filter_opened_date[]=to:2014-04-02" + + expect(last_response.status).to eq(422) + expect( + parsed_response, + ).to eq( + { "error" => %{Too many values (2) for parameter "opened_date" (must occur at most once)} }, + ) + end + + it "cannot provide invalid dates for date filter" do + get "/specialist-documents-search?filter_document_type=cma_case&filter_opened_date=from:not-a-date" + + expect(last_response.status).to eq(422) + expect( + parsed_response, + ).to eq( + { "error" => %{Invalid "from" value "not-a-date" for parameter "opened_date" (expected ISO8601 date)} }, + ) + end + + it "expands organisations" do + document_params = { + "title" => "Advice on Treatment of Dragons", + "link" => "/dragon-guide", + "organisations" => ["/ministry-of-magic"], + } + commit_document("government_test", document_params) + + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + "format" => "organisation", + } + commit_document("government_test", document_params) + + get "/specialist-documents-search.json?q=dragons" + + expect(first_result["organisations"]).to eq( + [{ "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic" }], + ) + end + + it "also works with the /api prefix" do + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + "format" => "organisation", + } + commit_document("government_test", document_params) + + document_params = { + "title" => "Advice on Treatment of Dragons", + "link" => "/dragon-guide", + "organisations" => ["/ministry-of-magic"], + } + commit_document("government_test", document_params) + + get "/api/specialist-documents-search.json?q=dragons" + + expect(first_result["organisations"]).to eq( + [{ "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic" }], + ) + end + + it "expands organisations via content_id" do + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + "content_id" => "organisation-content-id", + "format" => "organisation", + } + commit_document("government_test", document_params) + + document_params = { + "title" => "Advice on Treatment of Dragons", + "link" => "/dragon-guide", + "organisation_content_ids" => %w[organisation-content-id], + } + commit_document("government_test", document_params) + + get "/specialist-documents-search.json?q=dragons" + + # Adds a new key with the expanded organisations + expect_result_includes_ministry_of_magic_for_key(first_result, "expanded_organisations", "content_id" => "organisation-content-id") + + # Keeps the organisation content ids + expect( + first_result["organisation_content_ids"], + ).to eq( + %w[organisation-content-id], + ) + end + + it "will show the query" do + get "/specialist-documents-search?q=test&debug=show_query" + + expect(parsed_response.fetch("elasticsearch_query")).to be_truthy + end + + it "will show the cluster" do + get "/specialist-documents-search?q=test" + expect(parsed_response.fetch("es_cluster")).to eq(Clusters.default_cluster.key) + + Clusters.active.each do |cluster| + get "/specialist-documents-search?q=test&ab_tests=search_cluster_query:#{cluster.key}" + expect(parsed_response.fetch("es_cluster")).to eq(cluster.key) + end + end + + it "can return the taxonomy" do + document_params = { + "slug" => "/ministry-of-magic", + "link" => "/ministry-of-magic-site", + "title" => "Ministry of Magic", + "taxons" => %w[eb2093ef-778c-4105-9f33-9aa03d14bc5c], + } + commit_document(index, document_params) + + get "/specialist-documents-search?q=Ministry of Magict&fields[]=taxons" + expect(parsed_response.fetch("total")).to eq(1) + + taxons = parsed_response.dig("results", 0, "taxons") + expect(taxons).to eq(%w[eb2093ef-778c-4105-9f33-9aa03d14bc5c]) + end + +private + + def first_result + @first_result ||= parsed_response["results"].first + end + + def result_links + @result_links ||= parsed_response["results"].map do |result| + result["link"] + end + end + + def result_titles + @result_titles ||= parsed_response["results"].map do |result| + result["title"] + end + end + + def es_score_by_link(link) + parsed_response["results"].find { |result| result["link"] == link }["es_score"] + end +end diff --git a/spec/integration/specialist_finder_index/specialist_documents_spec.rb b/spec/integration/specialist_finder_index/specialist_documents_spec.rb new file mode 100644 index 000000000..7ec78419c --- /dev/null +++ b/spec/integration/specialist_finder_index/specialist_documents_spec.rb @@ -0,0 +1,74 @@ +require "spec_helper" + +RSpec.describe "SpecialistDocumentsTest" do + before do + bunny_mock = BunnyMock.new + @channel = bunny_mock.start.channel + + consumer = GovukMessageQueueConsumer::Consumer.new( + queue_name: "bigwig.test", + processor: SpecialistFinderIndex::PublishingEventProcessor.new, + rabbitmq_connection: bunny_mock, + ) + + @queue = @channel.queue("bigwig.test") + consumer.run + end + + it "specialist documents are correctly indexed" do + document_types = %w[ + aaib_report + asylum_support_decision + business_finance_support_scheme + cma_case + countryside_stewardship_grant + drug_safety_update + employment_appeal_tribunal_decision + employment_tribunal_decision + flood_and_coastal_erosion_risk_management_research_report + international_development_fund + licence_transaction + maib_report + medical_safety_alert + protected_food_drink_name + raib_report + research_for_development_output + residential_property_tribunal_decision + service_standard_report + statutory_instrument + tax_tribunal_decision + utaac_decision + ] + + # ideally we would run a test for all document types, but this takes 3 seconds so I have limited + # it to a random subset + document_types.sample(3).each do |specialist_document_type| + random_example = generate_random_example( + schema: "specialist_document", + payload: { document_type: specialist_document_type }, + ) + + @queue.publish(random_example.to_json, content_type: "application/json") + + expect_document_is_in_rummager({ "link" => random_example["base_path"] }, index: "specialist-finder_test", type: specialist_document_type) + end + end + + it "esi documents are correctly indexed" do + publisher_document_type = "esi_fund" + search_document_type = "european_structural_investment_fund" + + random_example = generate_random_example( + schema: "specialist_document", + payload: { document_type: publisher_document_type }, + ) + + @queue.publish(random_example.to_json, content_type: "application/json") + + expect_document_is_in_rummager( + { "link" => random_example["base_path"], "format" => search_document_type }, + index: "specialist-finder_test", + type: publisher_document_type, + ) + end +end diff --git a/spec/support/index_helpers.rb b/spec/support/index_helpers.rb index 35459a3d2..9a910f2de 100644 --- a/spec/support/index_helpers.rb +++ b/spec/support/index_helpers.rb @@ -9,7 +9,7 @@ def self.setup_test_indexes end def self.all_index_names - SearchConfig.content_index_names + SearchConfig.auxiliary_index_names + [SearchConfig.govuk_index_name] + SearchConfig.content_index_names + SearchConfig.auxiliary_index_names + [SearchConfig.govuk_index_name, SearchConfig.specialist_finder_index_name] end def self.clean_all diff --git a/spec/support/search_integration_spec_helper.rb b/spec/support/search_integration_spec_helper.rb index cd12f0c44..596ce53e2 100644 --- a/spec/support/search_integration_spec_helper.rb +++ b/spec/support/search_integration_spec_helper.rb @@ -31,93 +31,93 @@ def cma_case_attributes(attributes = {}) }.merge(attributes) end - def commit_filter_from_date_documents + def commit_filter_from_date_documents(index = "govuk_test") commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-30", "link" => "/old-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-30T23:00:00.000+00:00", "link" => "/old-cma-with-datetime"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-31", "link" => "/matching-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-31T00:00:00.000+00:00", "link" => "/matching-cma-with-datetime"), type: "cma_case", ) end - def commit_filter_from_time_documents + def commit_filter_from_time_documents(index = "govuk_test") commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-31", "link" => "/old-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-31T13:59:59.000+00:00", "link" => "/old-cma-with-datetime"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-01", "link" => "/matching-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-03-31T14:00:00.000+00:00", "link" => "/matching-cma-with-datetime"), type: "cma_case", ) end - def commit_filter_to_date_documents + def commit_filter_to_date_documents(index = "govuk_test") commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-02", "link" => "/matching-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-02T05:00:00.000+00:00", "link" => "/matching-cma-with-datetime"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-03", "link" => "/future-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-03T00:00:00.000+00:00", "link" => "/future-cma-with-datetime"), type: "cma_case", ) end - def commit_filter_to_time_documents + def commit_filter_to_time_documents(index = "govuk_test") commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-02", "link" => "/matching-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-02T11:00:00.000+00:00", "link" => "/matching-cma-with-datetime"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-03", "link" => "/future-cma-with-date"), type: "cma_case", ) commit_document( - "govuk_test", + index, cma_case_attributes("opened_date" => "2014-04-02T11:00:01.000+00:00", "link" => "/future-cma-with-datetime"), type: "cma_case", ) diff --git a/spec/unit/govuk_index/specialist_formats_spec.rb b/spec/unit/specialist_finder_index/presenter/specialist_formats_spec.rb similarity index 98% rename from spec/unit/govuk_index/specialist_formats_spec.rb rename to spec/unit/specialist_finder_index/presenter/specialist_formats_spec.rb index 4b9336f19..2d05099df 100644 --- a/spec/unit/govuk_index/specialist_formats_spec.rb +++ b/spec/unit/specialist_finder_index/presenter/specialist_formats_spec.rb @@ -1,6 +1,6 @@ require "spec_helper" -RSpec.describe GovukIndex::ElasticsearchPresenter, "Specialist formats" do +RSpec.describe SpecialistFinderIndex::ElasticsearchPresenter, "Specialist formats" do before do allow_any_instance_of(Indexer::PopularityLookup).to receive(:lookup_popularities).and_return({}) end @@ -256,7 +256,7 @@ def build_example_with_metadata(metadata) payload end - type_mapper = GovukIndex::DocumentTypeMapper.new(example) + type_mapper = SpecialistFinderIndex::DocumentTypeMapper.new(example) described_class.new(payload: example, type_mapper:).document end