From 4c9c1857c0fa5899b2dc6b368d9fbfe00480bc0b Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Tue, 22 Oct 2024 15:04:00 +0100 Subject: [PATCH] Add landing page block content to search index - Landing pages have "blocks" rather than a body, and by convention we want anything in a block that has the key `content:` to be put into the search index. - However, some blocks (govspeak ones) may be structured to contain multiple content types (eg if they were marked up as content-type: text/govspeak, publishing-api will have automatically created a rendered text/html and will present both of these to the search api in the message_queue message. This means that we can't easily use a JSONPath, so instead when we have a detals/blocks section we recurse into it. For each element as we travel down the path we either find that it's a content block with structure that includes a text/html content type (in which case we add its content to the search index), a scalar value (in which case we add it to the search index), or another type of array (in which case we recurse into that) --- app/models/concerns/publishing_api/content.rb | 29 +- .../message_queue/landing_page_message.json | 265 ++++++++++++++++++ .../document_synchronization_spec.rb | 32 +++ 3 files changed, 321 insertions(+), 5 deletions(-) create mode 100644 spec/fixtures/files/message_queue/landing_page_message.json diff --git a/app/models/concerns/publishing_api/content.rb b/app/models/concerns/publishing_api/content.rb index 9bfb272..c166efd 100644 --- a/app/models/concerns/publishing_api/content.rb +++ b/app/models/concerns/publishing_api/content.rb @@ -62,17 +62,36 @@ def content values_from_json_paths = INDEXABLE_CONTENT_VALUES_JSON_PATHS.map do |item| item.on(document_hash).map { |body| BodyContent.new(body).html_content } end + values_from_parts = document_hash.dig(:details, :parts)&.map do |part| # Add the part title as a heading to help the search model better understand the structure # of the content ["

#{part[:title]}

", BodyContent.new(part[:body]).html_content] end - [*values_from_json_paths, *values_from_parts] - .flatten - .compact_blank - .join(INDEXABLE_CONTENT_SEPARATOR) - .truncate_bytes(INDEXABLE_CONTENT_MAX_BYTE_SIZE) + [ + *values_from_json_paths, + *values_from_parts, + *values_from_blocks(document_hash.dig(:details, :blocks)), + ].flatten + .compact_blank + .join(INDEXABLE_CONTENT_SEPARATOR) + .truncate_bytes(INDEXABLE_CONTENT_MAX_BYTE_SIZE) + end + + def values_from_blocks(item) + case item + in { content_type: "text/html", content: html_content } + html_content + in { content: String => content } unless item.key?(:content_type) + content + in Hash + item.values.flat_map { values_from_blocks(_1) } + in Array + item.flat_map { values_from_blocks(_1) } + else + nil + end end end end diff --git a/spec/fixtures/files/message_queue/landing_page_message.json b/spec/fixtures/files/message_queue/landing_page_message.json new file mode 100644 index 0000000..867460a --- /dev/null +++ b/spec/fixtures/files/message_queue/landing_page_message.json @@ -0,0 +1,265 @@ +{ + "title": "Landing Page Fixture", + "public_updated_at": "2024-10-21T23:00:00Z", + "publishing_app": "whitehall", + "rendering_app": "frontend", + "update_type": "minor", + "phase": "live", + "analytics_identifier": null, + "document_type": "landing_page", + "schema_name": "landing_page", + "first_published_at": "2024-10-21T23:00:00Z", + "base_path": "/landing-page/search-test", + "description": "Landing page description", + "details": { + "blocks": [ + { + "type": "hero", + "image": { + "alt": "Placeholder alt text", + "sources": { + "mobile": "landing_page/placeholder/mobile.png" + } + }, + "hero_content": { + "blocks": [ + { + "type": "govspeak", + "content": [ + { + "content": "## Govspeak in Hero Example\n\nTHIS SHOULD NOT APPEAR!\n", + "content_type": "text/govspeak" + }, + { + "content_type": "text/html", + "content": "\u003ch2 id=\"govspeak-in-hero-example\"\u003eGovspeak in Hero Example\u003c/h2\u003e\n\n\u003cp\u003eMore text\u003c/p\u003e\n" + } + ], + "inverse": true + } + ] + } + }, + { + "type": "featured", + "image": { + "alt": "example alt text", + "sources": { + "mobile": "landing_page/placeholder/mobile.png" + } + }, + "featured_content": { + "blocks": [ + { + "type": "govspeak", + "content": "\u003ch2\u003eGovspeak in featured example\u003c/h2\u003e\n\u003cp\u003eLorem ipsum.\u003c/p\u003e\n", + "inverse": true + } + ] + } + }, + { + "type": "header", + "content": "Header block example" + }, + { + "type": "tabs", + "tab_items": [ + { + "id": "tab-1", + "label": "Item 1", + "content": "Content in tab example one" + }, + { + "id": "tab-2", + "label": "Item 2", + "content": "Content in tab example two" + } + ] + }, + { + "type": "govspeak", + "content": "\u003cp\u003eHand-crafted Govspeak example\u003c/p\u003e" + }, + { + "type": "two_column_layout", + "theme": "two_thirds_one_third", + "blocks": [ + { + "type": "govspeak", + "content": "\u003cp\u003eGovspeak in two column example left\u003c/p\u003e" + } + ] + } + ] + }, + "routes": [ + { + "path": "/landing-page/search-test", + "type": "exact" + } + ], + "redirects": [], + "content_id": "4423de24-06d2-454c-8fc1-2bd9c43087f0", + "locale": "en", + "expanded_links": { + "taxons": [ + { + "content_id": "e48ab80a-de80-4e83-bf59-26316856a5f9", + "title": "Government", + "locale": "en", + "analytics_identifier": null, + "api_path": "/api/content/government/all", + "base_path": "/government/all", + "document_type": "taxon", + "public_updated_at": "2018-09-16T20:29:39Z", + "schema_name": "taxon", + "withdrawn": false, + "description": "", + "details": { + "internal_name": "Government", + "notes_for_editors": "", + "visible_to_departmental_editors": true + }, + "phase": "live", + "links": { + "root_taxon": [ + { + "content_id": "f3bbdec2-0e62-4520-a7fd-6ffd5d36e03a", + "title": "GOV.UK homepage", + "locale": "en", + "analytics_identifier": null, + "api_path": "/api/content/", + "base_path": "/", + "document_type": "homepage", + "public_updated_at": "2023-06-28T09:32:34Z", + "schema_name": "homepage", + "withdrawn": false, + "links": {} + } + ] + } + } + ], + "organisations": [ + { + "content_id": "af07d5a5-df63-4ddc-9383-6a666845ebe9", + "title": "Government Digital Service", + "locale": "en", + "analytics_identifier": "OT1056", + "api_path": "/api/content/government/organisations/government-digital-service", + "base_path": "/government/organisations/government-digital-service", + "document_type": "organisation", + "schema_name": "organisation", + "withdrawn": false, + "details": { + "acronym": "GDS", + "logo": { + "crest": "single-identity", + "formatted_title": "Government Digital Service" + }, + "brand": "department-for-science-innovation-and-technology", + "default_news_image": null, + "organisation_govuk_status": { + "url": null, + "status": "live", + "updated_at": null + } + }, + "links": {} + } + ], + "original_primary_publishing_organisation": [ + { + "content_id": "af07d5a5-df63-4ddc-9383-6a666845ebe9", + "title": "Government Digital Service", + "locale": "en", + "analytics_identifier": "OT1056", + "api_path": "/api/content/government/organisations/government-digital-service", + "base_path": "/government/organisations/government-digital-service", + "document_type": "organisation", + "schema_name": "organisation", + "withdrawn": false, + "details": { + "acronym": "GDS", + "logo": { + "crest": "single-identity", + "formatted_title": "Government Digital Service" + }, + "brand": "department-for-science-innovation-and-technology", + "default_news_image": null, + "organisation_govuk_status": { + "url": null, + "status": "live", + "updated_at": null + } + }, + "links": {} + } + ], + "primary_publishing_organisation": [ + { + "content_id": "af07d5a5-df63-4ddc-9383-6a666845ebe9", + "title": "Government Digital Service", + "locale": "en", + "analytics_identifier": "OT1056", + "api_path": "/api/content/government/organisations/government-digital-service", + "base_path": "/government/organisations/government-digital-service", + "document_type": "organisation", + "schema_name": "organisation", + "withdrawn": false, + "details": { + "acronym": "GDS", + "logo": { + "crest": "single-identity", + "formatted_title": "Government Digital Service" + }, + "brand": "department-for-science-innovation-and-technology", + "default_news_image": null, + "organisation_govuk_status": { + "url": null, + "status": "live", + "updated_at": null + } + }, + "links": {} + } + ], + "available_translations": [ + { + "title": "Search test", + "public_updated_at": "2024-10-21T23:00:00Z", + "analytics_identifier": null, + "document_type": "landing_page", + "schema_name": "landing_page", + "base_path": "/landing-page/search-test", + "api_path": "/api/content/landing-page/search-test", + "withdrawn": false, + "content_id": "4423de24-06d2-454c-8fc1-2bd9c43087f0", + "locale": "en" + } + ] + }, + "user_journey_document_supertype": "thing", + "email_document_supertype": "other", + "government_document_supertype": "other", + "content_purpose_subgroup": "other", + "content_purpose_supergroup": "other", + "publishing_request_id": "21-1729602038.916-10.1.21.246-2547", + "govuk_request_id": null, + "links": { + "taxons": [ + "e48ab80a-de80-4e83-bf59-26316856a5f9" + ], + "organisations": [ + "af07d5a5-df63-4ddc-9383-6a666845ebe9" + ], + "original_primary_publishing_organisation": [ + "af07d5a5-df63-4ddc-9383-6a666845ebe9" + ], + "primary_publishing_organisation": [ + "af07d5a5-df63-4ddc-9383-6a666845ebe9" + ] + }, + "payload_version": "12345" +} \ No newline at end of file diff --git a/spec/integration/document_synchronization_spec.rb b/spec/integration/document_synchronization_spec.rb index 513cea3..0a9e221 100644 --- a/spec/integration/document_synchronization_spec.rb +++ b/spec/integration/document_synchronization_spec.rb @@ -463,6 +463,38 @@ end end + describe "for a 'landing_page' message" do + let(:payload) { json_fixture_as_hash("message_queue/landing_page_message.json") } + + it "is added to Discovery Engine through the Put service" do + expect(DiscoveryEngine::Sync::Put).to have_received(:new).with( + "4423de24-06d2-454c-8fc1-2bd9c43087f0", + { + content_id: "4423de24-06d2-454c-8fc1-2bd9c43087f0", + content_purpose_supergroup: "other", + debug: { + last_synced_at: "1989-12-13T01:02:03+00:00", + payload_version: 12_345, + }, + description: "Landing page description", + document_type: "landing_page", + is_historic: 0, + link: "/landing-page/search-test", + locale: "en", + organisations: %w[government-digital-service], + part_of_taxonomy_tree: %w[f3bbdec2-0e62-4520-a7fd-6ffd5d36e03a e48ab80a-de80-4e83-bf59-26316856a5f9], + public_timestamp: 1_729_551_600, + public_timestamp_datetime: "2024-10-21T23:00:00Z", + title: "Landing Page Fixture", + url: "https://www.gov.uk/landing-page/search-test", + }, + content: "Landing Page Fixture\nLanding page description\n

Govspeak in Hero Example

\n\n

More text

\n\n

Govspeak in featured example

\n

Lorem ipsum.

\n\nHeader block example\nContent in tab example one\nContent in tab example two\n

Hand-crafted Govspeak example

\n

Govspeak in two column example left

", + payload_version: 12_345, + ) + expect(put_service).to have_received(:call) + end + end + describe "for an 'external_content' message" do let(:payload) { json_fixture_as_hash("message_queue/external_content_message.json") }