From a1cbe21ccfc95ae4c35406c9c9ab0df99109ffdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Wed, 17 Jan 2024 14:38:43 +0100 Subject: [PATCH 1/5] More acurately reflect resource requests/limits in configuration We know our OpenSearch servers need 2G of memory, so let's request that. At least I think that's what this suggests to do: https://medium.com/@betz.mark/understanding-resource-limits-in-kubernetes-memory-6b41e9a955f9 --- src/main/kubernetes/openshift.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kubernetes/openshift.yml b/src/main/kubernetes/openshift.yml index c2f0dc71..5dddf24a 100644 --- a/src/main/kubernetes/openshift.yml +++ b/src/main/kubernetes/openshift.yml @@ -153,10 +153,10 @@ spec: resources: limits: cpu: 2000m - memory: 2Gi + memory: 3Gi requests: cpu: 1000m - memory: 1Gi + memory: 2Gi readinessProbe: httpGet: scheme: HTTP From f5e7d1ba1f02a51009f6b5da2bb3c6c016bf7cf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Wed, 17 Jan 2024 14:40:11 +0100 Subject: [PATCH 2/5] Update instructions to simulate prod in README --- README.adoc | 56 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/README.adoc b/README.adoc index a468d1df..b9462268 100644 --- a/README.adoc +++ b/README.adoc @@ -107,16 +107,54 @@ Then start it this way: [source,shell] ---- podman pod create -p 8080:8080 -p 9000:9000 -p 9200:9200 --name search.quarkus.io -podman container run -d --name elasticearch --pod search.quarkus.io \ - -e "discovery.type=single-node" -e "xpack.security.enabled=false" \ - -e "ES_JAVA_OPTS=-Xms1g -Xmx1g" -e "cluster.routing.allocation.disk.threshold_enabled=false" \ - docker.io/opensearchproject/opensearch:2.11.0 +# Start multiple OpenSearch containers +podman container run -d --name search-backend-0 --pod search.quarkus.io \ + --cpus=2 --memory=2g \ + -e "node.name=search-backend-0" \ + -e "discovery.seed_hosts=localhost" \ + -e "cluster.initial_cluster_manager_nodes=search-backend-0,search-backend-1,search-backend-2" \ + -e "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" \ + -e "DISABLE_SECURITY_PLUGIN=true" \ + -e "cluster.routing.allocation.disk.threshold_enabled=false" \ + opensearch-custom-plugin:2.11.0 +podman container run -d --name search-backend-1 --pod search.quarkus.io \ + --cpus=2 --memory=2g \ + -e "node.name=search-backend-1" \ + -e "discovery.seed_hosts=localhost" \ + -e "cluster.initial_cluster_manager_nodes=search-backend-0,search-backend-1,search-backend-2" \ + -e "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" \ + -e "DISABLE_SECURITY_PLUGIN=true" \ + -e "cluster.routing.allocation.disk.threshold_enabled=false" \ + opensearch-custom-plugin:2.11.0 +podman container run -d --name search-backend-2 --pod search.quarkus.io \ + --cpus=2 --memory=2g \ + -e "node.name=search-backend-2" \ + -e "discovery.seed_hosts=localhost" \ + -e "cluster.initial_cluster_manager_nodes=search-backend-0,search-backend-1,search-backend-2" \ + -e "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" \ + -e "DISABLE_SECURITY_PLUGIN=true" \ + -e "cluster.routing.allocation.disk.threshold_enabled=false" \ + opensearch-custom-plugin:2.11.0 # Then the app; this will fetch the actual data on startup (might take a while): -podman container run -it --rm --pod search.quarkus.io search-quarkus-io:999-SNAPSHOT -# OR, if you already have a local clone of quarkus.io: -podman container run -it --rm --pod search.quarkus.io \ - -v $HOME/path/to/quarkusio.github.io:/mnt/quarkus-io:ro,z \ - -e QUARKUSIO_GIT_URI=file:/mnt/quarkus-io \ +podman container run -it --rm --name search.quarkus.io --pod search.quarkus.io search-quarkus-io:999-SNAPSHOT +# OR, if you already have locals clones of *.quarkus.io: +# (you might need to run quarkus dev with those repos first to get them all in sync) +REPOS_DIR=$HOME/path/to/dir/containing/repos +podman container run -it --rm --name search.quarkus.io --pod search.quarkus.io \ + --cpus=1 --memory=1g \ + -v $REPOS_DIR/quarkusio.github.io:/mnt/quarkus.io:ro,z \ + -v $REPOS_DIR/cn.quarkus.io:/mnt/cn.quarkus.io:ro,z \ + -v $REPOS_DIR/es.quarkus.io:/mnt/es.quarkus.io:ro,z \ + -v $REPOS_DIR/ja.quarkus.io:/mnt/ja.quarkus.io:ro,z \ + -v $REPOS_DIR/pt.quarkus.io:/mnt/pt.quarkus.io:ro,z \ + -e INDEXING_ERROR_REPORTING_TYPE=log \ + -e GITHUB_OAUTH=ignored \ + -e GITHUB_STATUS_ISSUE_ID=1 \ + -e QUARKUSIO_GIT_URI=file:/mnt/quarkus.io \ + -e QUARKUSIO_LOCALIZED_CN_GIT_URI=file:/mnt/cn.quarkus.io \ + -e QUARKUSIO_LOCALIZED_ES_GIT_URI=file:/mnt/es.quarkus.io \ + -e QUARKUSIO_LOCALIZED_JA_GIT_URI=file:/mnt/ja.quarkus.io \ + -e QUARKUSIO_LOCALIZED_PT_GIT_URI=file:/mnt/pt.quarkus.io \ search-quarkus-io:999-SNAPSHOT ---- From aece2374f595f3a11cdf895f148364565c568a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Wed, 17 Jan 2024 14:56:19 +0100 Subject: [PATCH 3/5] Ensure app and backend are placed next to each other on OpenShift See https://docs.openshift.com/container-platform/4.14/nodes/scheduling/nodes-scheduler-pod-affinity.html --- src/main/kubernetes/openshift.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/main/kubernetes/openshift.yml b/src/main/kubernetes/openshift.yml index 5dddf24a..bf36a5e0 100644 --- a/src/main/kubernetes/openshift.yml +++ b/src/main/kubernetes/openshift.yml @@ -14,6 +14,18 @@ spec: # Without this, we'd end up with a verbatim copy of this (obviously incomplete) DeploymentConfig. template: spec: + # Make sure the app runs in the same zone as the backend + # See https://docs.openshift.com/container-platform/4.14/nodes/scheduling/nodes-scheduler-pod-affinity.html + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - search-backend + topologyKey: topology.kubernetes.io/zone containers: - name: search-quarkus-io # Oddly enough, the quarkus-openshift extension doesn't generate this @@ -146,6 +158,18 @@ spec: app.kubernetes.io/part-of: search-quarkus-io app.kubernetes.io/managed-by: quarkus spec: + # Make sure the backend pods all run in the same zone + # See https://docs.openshift.com/container-platform/4.14/nodes/scheduling/nodes-scheduler-pod-affinity.html + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - search-backend + topologyKey: topology.kubernetes.io/zone containers: - name: opensearch image: opensearch-fixed:current From 1c7f8ef89b835b75c581d241aaba9b13c55ff70b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Wed, 17 Jan 2024 15:58:15 +0100 Subject: [PATCH 4/5] Return an approximate total hit count above a given threshold --- src/main/java/io/quarkus/search/app/SearchService.java | 4 +++- .../java/io/quarkus/search/app/dto/SearchResult.java | 10 +++++++++- .../java/io/quarkus/search/app/SearchServiceTest.java | 10 +++++----- .../io/quarkus/search/app/indexing/SchedulerTest.java | 2 +- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index 19947b63..d37bad41 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -32,6 +32,7 @@ public class SearchService { private static final Integer PAGE_SIZE = 50; + private static final Integer TOTAL_HIT_COUNT_THRESHOLD = 100; private static final String MAX_FOR_PERF_MESSAGE = "{jakarta.validation.constraints.Max.message} for performance reasons"; @Inject @@ -105,8 +106,9 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi f -> f.unified().noMatchSize(0).numberOfFragments(contentSnippets).fragmentSize(contentSnippetsLength)) .sort(f -> f.score().then().field("title_sort")) .routing(VersionAndLanguageRoutingBinder.key(version, language)) + .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD) .fetch(page * PAGE_SIZE, PAGE_SIZE); - return new SearchResult<>(result.total().hitCount(), result.hits()); + return new SearchResult<>(result); } private String localizedField(String field, Language language) { diff --git a/src/main/java/io/quarkus/search/app/dto/SearchResult.java b/src/main/java/io/quarkus/search/app/dto/SearchResult.java index 2a5821c4..33746255 100644 --- a/src/main/java/io/quarkus/search/app/dto/SearchResult.java +++ b/src/main/java/io/quarkus/search/app/dto/SearchResult.java @@ -2,5 +2,13 @@ import java.util.List; -public record SearchResult(long total, List hits) { +public record SearchResult(Total total, List hits) { + public SearchResult(org.hibernate.search.engine.search.query.SearchResult result) { + this(new Total(result.total().isHitCountExact() ? result.total().hitCount() : null, + result.total().hitCountLowerBound()), + result.hits()); + } + + public record Total(Long exact, Long lowerBound) { + } } diff --git a/src/test/java/io/quarkus/search/app/SearchServiceTest.java b/src/test/java/io/quarkus/search/app/SearchServiceTest.java index abe5e498..9cbe1a81 100644 --- a/src/test/java/io/quarkus/search/app/SearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SearchServiceTest.java @@ -77,7 +77,7 @@ void waitForIndexing() { void queryNotMatching() { var result = search("termnotmatching"); assertThat(result.hits()).isEmpty(); - assertThat(result.total()).isEqualTo(0); + assertThat(result.total().exact()).isEqualTo(0); } @Test @@ -94,7 +94,7 @@ void queryMatchingFullTerm() { GuideRef.SPRING_DATA_JPA, GuideRef.ALL_CONFIG, GuideRef.ALL_BUILDITEMS)); - assertThat(result.total()).isEqualTo(9); + assertThat(result.total().exact()).isEqualTo(9); } @Test @@ -125,7 +125,7 @@ void queryMatchingPrefixTerm() { GuideRef.DUPLICATED_CONTEXT, GuideRef.ALL_CONFIG, GuideRef.ALL_BUILDITEMS)); - assertThat(result.total()).isEqualTo(10); + assertThat(result.total().exact()).isEqualTo(10); } @Test @@ -137,7 +137,7 @@ void queryMatchingTwoTerms() { GuideRef.HIBERNATE_SEARCH_ORM_ELASTICSEARCH, GuideRef.ALL_CONFIG, GuideRef.ALL_BUILDITEMS)); - assertThat(result.total()).isEqualTo(3); + assertThat(result.total().exact()).isEqualTo(3); } @Test @@ -262,7 +262,7 @@ void projections() { GuideRef.DUPLICATED_CONTEXT, GuideRef.ALL_CONFIG, GuideRef.ALL_BUILDITEMS)); - assertThat(result.total()).isEqualTo(10); + assertThat(result.total().exact()).isEqualTo(10); } @Test diff --git a/src/test/java/io/quarkus/search/app/indexing/SchedulerTest.java b/src/test/java/io/quarkus/search/app/indexing/SchedulerTest.java index f1786440..970f5af2 100644 --- a/src/test/java/io/quarkus/search/app/indexing/SchedulerTest.java +++ b/src/test/java/io/quarkus/search/app/indexing/SchedulerTest.java @@ -59,7 +59,7 @@ void scheduler() { .then() .statusCode(200) .extract().body().as(SEARCH_RESULT_SEARCH_HITS) - .total()).isPositive(); + .total().lowerBound()).isPositive(); }); } From aff79e685ba320a4d9e3727070862f97be2cf89d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Wed, 17 Jan 2024 16:20:52 +0100 Subject: [PATCH 5/5] Store term vectors on Guide#fullContent for faster highlighting --- src/main/java/io/quarkus/search/app/entity/Guide.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/io/quarkus/search/app/entity/Guide.java b/src/main/java/io/quarkus/search/app/entity/Guide.java index 538bf73f..382e1c1c 100644 --- a/src/main/java/io/quarkus/search/app/entity/Guide.java +++ b/src/main/java/io/quarkus/search/app/entity/Guide.java @@ -70,7 +70,7 @@ public class Guide { @Column(length = Length.LONG32) public String keywords; - @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @Transient @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO)