From 8a48f385016464275a73314663fb7ef06bdcc7f1 Mon Sep 17 00:00:00 2001 From: Naveen Tatikonda Date: Thu, 11 Jan 2024 17:26:51 -0600 Subject: [PATCH] Bump lucene codec to 99 (#1383) * Add Lucene Codec 9.9 Signed-off-by: Naveen Tatikonda * Fix import statements for Lucene95 Codec Signed-off-by: Naveen Tatikonda * Fix SegmentInfo Constructor in Test Signed-off-by: Naveen Tatikonda * Temporarily Ignore Old Codec Tests Signed-off-by: Naveen Tatikonda * Add CHANGELOG Signed-off-by: Naveen Tatikonda * Delete Old Codec Tests Signed-off-by: Naveen Tatikonda --------- Signed-off-by: Naveen Tatikonda (cherry picked from commit 45e9e542aef60ef7073ee726e6ac14dec27bfa04) --- CHANGELOG.md | 1 + .../KNN950PerFieldKnnVectorsFormat.java | 2 +- .../index/codec/KNN990Codec/KNN990Codec.java | 61 +++++++++++++++++++ .../KNN990PerFieldKnnVectorsFormat.java | 40 ++++++++++++ .../knn/index/codec/KNNCodecVersion.java | 22 ++++++- .../services/org.apache.lucene.codecs.Codec | 3 +- .../codec/KNN910Codec/KNN910CodecTests.java | 22 ------- .../codec/KNN920Codec/KNN920CodecTests.java | 23 ------- .../codec/KNN940Codec/KNN940CodecTests.java | 30 --------- .../KNN990CodecTests.java} | 20 +++--- .../knn/index/codec/KNNCodecFactoryTests.java | 2 +- .../knn/index/codec/KNNCodecTestUtil.java | 1 + .../knn/index/query/KNNWeightTests.java | 5 ++ 13 files changed, 142 insertions(+), 90 deletions(-) create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java delete mode 100644 src/test/java/org/opensearch/knn/index/codec/KNN910Codec/KNN910CodecTests.java delete mode 100644 src/test/java/org/opensearch/knn/index/codec/KNN920Codec/KNN920CodecTests.java delete mode 100644 src/test/java/org/opensearch/knn/index/codec/KNN940Codec/KNN940CodecTests.java rename src/test/java/org/opensearch/knn/index/codec/{KNN950Codec/KNN950CodecTests.java => KNN990Codec/KNN990CodecTests.java} (68%) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9c34eaf9..68041d7ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,4 +34,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Upgrade urllib to 1.26.17 [#1278](https://github.com/opensearch-project/k-NN/pull/1278) * Upgrade urllib to 1.26.18 [#1319](https://github.com/opensearch-project/k-NN/pull/1319) * Upgrade guava to 32.1.3 [#1319](https://github.com/opensearch-project/k-NN/pull/1319) +* Bump lucene codec to 99 [#1383](https://github.com/opensearch-project/k-NN/pull/1383) ### Refactoring diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java index d9091b2a7..05ce7271f 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java @@ -5,7 +5,7 @@ package org.opensearch.knn.index.codec.KNN950Codec; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; import org.opensearch.knn.index.util.KNNEngine; diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java new file mode 100644 index 000000000..4b8a1d3cd --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN990Codec; + +import lombok.Builder; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.opensearch.knn.index.codec.KNNCodecVersion; +import org.opensearch.knn.index.codec.KNNFormatFacade; + +/** + * KNN Codec that wraps the Lucene Codec which is part of Lucene 9.9 + */ +public class KNN990Codec extends FilterCodec { + private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_9_0; + private final KNNFormatFacade knnFormatFacade; + private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat; + + /** + * No arg constructor that uses Lucene99 as the delegate + */ + public KNN990Codec() { + this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat()); + } + + /** + * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec + * and a unique name to this ctor. + * + * @param delegate codec that will perform all operations this codec does not override + * @param knnVectorsFormat per field format for KnnVector + */ + @Builder + protected KNN990Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) { + super(VERSION.getCodecName(), delegate); + knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate); + perFieldKnnVectorsFormat = knnVectorsFormat; + } + + @Override + public DocValuesFormat docValuesFormat() { + return knnFormatFacade.docValuesFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return knnFormatFacade.compoundFormat(); + } + + @Override + public KnnVectorsFormat knnVectorsFormat() { + return perFieldKnnVectorsFormat; + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java new file mode 100644 index 000000000..abf40f2ef --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN990Codec; + +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; +import org.opensearch.knn.index.util.KNNEngine; + +import java.util.Optional; + +/** + * Class provides per field format implementation for Lucene Knn vector type + */ +public class KNN990PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat { + + public KNN990PerFieldKnnVectorsFormat(final Optional mapperService) { + super( + mapperService, + Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, + Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, + () -> new Lucene99HnswVectorsFormat(), + (maxConnm, beamWidth) -> new Lucene99HnswVectorsFormat(maxConnm, beamWidth) + ); + } + + @Override + /** + * This method returns the maximum dimension allowed from KNNEngine for Lucene codec + * + * @param fieldName Name of the field, ignored + * @return Maximum constant dimension set by KNNEngine + */ + public int getMaxDimensions(String fieldName) { + return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java index cbf6680f7..505dd50a5 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java @@ -11,7 +11,8 @@ import org.apache.lucene.backward_codecs.lucene92.Lucene92Codec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; @@ -23,6 +24,8 @@ import org.opensearch.knn.index.codec.KNN940Codec.KNN940PerFieldKnnVectorsFormat; import org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec; import org.opensearch.knn.index.codec.KNN950Codec.KNN950PerFieldKnnVectorsFormat; +import org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec; +import org.opensearch.knn.index.codec.KNN990Codec.KNN990PerFieldKnnVectorsFormat; import java.util.Optional; import java.util.function.BiFunction; @@ -92,9 +95,24 @@ public enum KNNCodecVersion { .knnVectorsFormat(new KNN950PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) .build(), KNN950Codec::new + ), + + V_9_9_0( + "KNN990Codec", + new Lucene99Codec(), + new KNN990PerFieldKnnVectorsFormat(Optional.empty()), + (delegate) -> new KNNFormatFacade( + new KNN80DocValuesFormat(delegate.docValuesFormat()), + new KNN80CompoundFormat(delegate.compoundFormat()) + ), + (userCodec, mapperService) -> KNN990Codec.builder() + .delegate(userCodec) + .knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) + .build(), + KNN990Codec::new ); - private static final KNNCodecVersion CURRENT = V_9_5_0; + private static final KNNCodecVersion CURRENT = V_9_9_0; private final String codecName; private final Codec defaultCodecDelegate; diff --git a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 5c44d5756..308b37967 100644 --- a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -5,4 +5,5 @@ org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec -org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec \ No newline at end of file +org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec +org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec \ No newline at end of file diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN910Codec/KNN910CodecTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN910Codec/KNN910CodecTests.java deleted file mode 100644 index 1ec28d6a4..000000000 --- a/src/test/java/org/opensearch/knn/index/codec/KNN910Codec/KNN910CodecTests.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.codec.KNN910Codec; - -import org.opensearch.knn.index.codec.KNNCodecTestCase; - -import java.io.IOException; -import java.util.concurrent.ExecutionException; - -public class KNN910CodecTests extends KNNCodecTestCase { - - public void testMultiFieldsKnnIndex() throws Exception { - testMultiFieldsKnnIndex(new KNN910Codec()); - } - - public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException { - testBuildFromModelTemplate(new KNN910Codec()); - } -} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN920Codec/KNN920CodecTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN920Codec/KNN920CodecTests.java deleted file mode 100644 index 8cdfc2d69..000000000 --- a/src/test/java/org/opensearch/knn/index/codec/KNN920Codec/KNN920CodecTests.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.codec.KNN920Codec; - -import org.opensearch.knn.index.codec.KNNCodecTestCase; -import java.io.IOException; -import java.util.concurrent.ExecutionException; - -import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_2_0; - -public class KNN920CodecTests extends KNNCodecTestCase { - - public void testMultiFieldsKnnIndex() throws Exception { - testMultiFieldsKnnIndex(KNN920Codec.builder().delegate(V_9_2_0.getDefaultCodecDelegate()).build()); - } - - public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException { - testBuildFromModelTemplate((KNN920Codec.builder().delegate(V_9_2_0.getDefaultCodecDelegate()).build())); - } -} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN940Codec/KNN940CodecTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN940Codec/KNN940CodecTests.java deleted file mode 100644 index 805edac9d..000000000 --- a/src/test/java/org/opensearch/knn/index/codec/KNN940Codec/KNN940CodecTests.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.codec.KNN940Codec; - -import org.apache.lucene.codecs.Codec; -import org.opensearch.knn.index.codec.KNNCodecTestCase; -import java.io.IOException; -import java.util.concurrent.ExecutionException; - -import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_4_0; - -public class KNN940CodecTests extends KNNCodecTestCase { - - public void testMultiFieldsKnnIndex() throws Exception { - testMultiFieldsKnnIndex(KNN940Codec.builder().delegate(V_9_4_0.getDefaultCodecDelegate()).build()); - } - - public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException { - testBuildFromModelTemplate((KNN940Codec.builder().delegate(V_9_4_0.getDefaultCodecDelegate()).build())); - } - - // Ensure that the codec is able to return the correct per field knn vectors format for codec - public void testCodecSetsCustomPerFieldKnnVectorsFormat() { - final Codec codec = new KNN940Codec(); - assertTrue(codec.knnVectorsFormat() instanceof KNN940PerFieldKnnVectorsFormat); - } -} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java similarity index 68% rename from src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java rename to src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java index 8eafb6a4a..307ebbb24 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.knn.index.codec.KNN950Codec; +package org.opensearch.knn.index.codec.KNN990Codec; import lombok.SneakyThrows; import org.apache.lucene.codecs.Codec; @@ -14,24 +14,24 @@ import java.util.Optional; import java.util.function.Function; -import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0; +import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_9_0; -public class KNN950CodecTests extends KNNCodecTestCase { +public class KNN990CodecTests extends KNNCodecTestCase { @SneakyThrows public void testMultiFieldsKnnIndex() { - testMultiFieldsKnnIndex(KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build()); + testMultiFieldsKnnIndex(KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build()); } @SneakyThrows public void testBuildFromModelTemplate() { - testBuildFromModelTemplate((KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build())); + testBuildFromModelTemplate((KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build())); } // Ensure that the codec is able to return the correct per field knn vectors format for codec public void testCodecSetsCustomPerFieldKnnVectorsFormat() { - final Codec codec = new KNN950Codec(); - assertTrue(codec.knnVectorsFormat() instanceof KNN950PerFieldKnnVectorsFormat); + final Codec codec = new KNN990Codec(); + assertTrue(codec.knnVectorsFormat() instanceof KNN990PerFieldKnnVectorsFormat); } // IMPORTANT: When this Codec is moved to a backwards Codec, this test needs to be removed, because it attempts to @@ -39,10 +39,10 @@ public void testCodecSetsCustomPerFieldKnnVectorsFormat() { @SneakyThrows public void testKnnVectorIndex() { Function perFieldKnnVectorsFormatProvider = ( - mapperService) -> new KNN950PerFieldKnnVectorsFormat(Optional.of(mapperService)); + mapperService) -> new KNN990PerFieldKnnVectorsFormat(Optional.of(mapperService)); - Function knnCodecProvider = (knnVectorFormat) -> KNN950Codec.builder() - .delegate(V_9_5_0.getDefaultCodecDelegate()) + Function knnCodecProvider = (knnVectorFormat) -> KNN990Codec.builder() + .delegate(V_9_9_0.getDefaultCodecDelegate()) .knnVectorsFormat(knnVectorFormat) .build(); diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java index 2ec953b18..29dae6085 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java @@ -9,7 +9,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.backward_codecs.lucene91.Lucene91Codec; import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec; import org.opensearch.knn.KNNTestCase; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_1_0; diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java index ad0cd37a0..08dedb0e7 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java @@ -363,6 +363,7 @@ public static SegmentInfo newSegmentInfo(final Directory directory, final String segmentName, docsInSegment, false, + false, codec, Collections.emptyMap(), randomByteArrayOfLength(StringHelper.ID_LENGTH), diff --git a/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java b/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java index 79e41b52f..a71f25822 100644 --- a/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java +++ b/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java @@ -181,6 +181,7 @@ public void testQueryScoreForFaissWithModel() throws IOException { SEGMENT_NAME, 100, true, + false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -270,6 +271,7 @@ public void testShardWithoutFiles() { SEGMENT_NAME, 100, false, + false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -313,6 +315,7 @@ public void testEmptyQueryResults() { SEGMENT_NAME, 100, true, + false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -369,6 +372,7 @@ public void testANNWithFilterQuery_whenDoingANN_thenSuccess() { SEGMENT_NAME, 100, true, + false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -617,6 +621,7 @@ private void testQueryScore( SEGMENT_NAME, 100, true, + false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH],