Skip to content

Commit

Permalink
Bump lucene codec to 99 (#1383)
Browse files Browse the repository at this point in the history
* Add Lucene Codec 9.9

Signed-off-by: Naveen Tatikonda <[email protected]>

* Fix import statements for Lucene95 Codec

Signed-off-by: Naveen Tatikonda <[email protected]>

* Fix SegmentInfo Constructor in Test

Signed-off-by: Naveen Tatikonda <[email protected]>

* Temporarily Ignore Old Codec Tests

Signed-off-by: Naveen Tatikonda <[email protected]>

* Add CHANGELOG

Signed-off-by: Naveen Tatikonda <[email protected]>

* Delete Old Codec Tests

Signed-off-by: Naveen Tatikonda <[email protected]>

---------

Signed-off-by: Naveen Tatikonda <[email protected]>
  • Loading branch information
naveentatikonda authored Jan 11, 2024
1 parent 45282e0 commit 45e9e54
Show file tree
Hide file tree
Showing 13 changed files with 142 additions and 90 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Upgrade urllib to 1.26.17 [#1278](https://github.com/opensearch-project/k-NN/pull/1278)
* Upgrade urllib to 1.26.18 [#1319](https://github.com/opensearch-project/k-NN/pull/1319)
* Upgrade guava to 32.1.3 [#1319](https://github.com/opensearch-project/k-NN/pull/1319)
* Bump lucene codec to 99 [#1383](https://github.com/opensearch-project/k-NN/pull/1383)
### Refactoring
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

package org.opensearch.knn.index.codec.KNN950Codec;

import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat;
import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
import org.opensearch.knn.index.util.KNNEngine;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.Builder;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.KNNCodecVersion;
import org.opensearch.knn.index.codec.KNNFormatFacade;

/**
* KNN Codec that wraps the Lucene Codec which is part of Lucene 9.9
*/
public class KNN990Codec extends FilterCodec {
private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_9_0;
private final KNNFormatFacade knnFormatFacade;
private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat;

/**
* No arg constructor that uses Lucene99 as the delegate
*/
public KNN990Codec() {
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat());
}

/**
* Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec
* and a unique name to this ctor.
*
* @param delegate codec that will perform all operations this codec does not override
* @param knnVectorsFormat per field format for KnnVector
*/
@Builder
protected KNN990Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) {
super(VERSION.getCodecName(), delegate);
knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate);
perFieldKnnVectorsFormat = knnVectorsFormat;
}

@Override
public DocValuesFormat docValuesFormat() {
return knnFormatFacade.docValuesFormat();
}

@Override
public CompoundFormat compoundFormat() {
return knnFormatFacade.compoundFormat();
}

@Override
public KnnVectorsFormat knnVectorsFormat() {
return perFieldKnnVectorsFormat;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
import org.opensearch.knn.index.util.KNNEngine;

import java.util.Optional;

/**
* Class provides per field format implementation for Lucene Knn vector type
*/
public class KNN990PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat {

public KNN990PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) {
super(
mapperService,
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH,
() -> new Lucene99HnswVectorsFormat(),
(maxConnm, beamWidth) -> new Lucene99HnswVectorsFormat(maxConnm, beamWidth)
);
}

@Override
/**
* This method returns the maximum dimension allowed from KNNEngine for Lucene codec
*
* @param fieldName Name of the field, ignored
* @return Maximum constant dimension set by KNNEngine
*/
public int getMaxDimensions(String fieldName) {
return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE);
}
}
22 changes: 20 additions & 2 deletions src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import org.apache.lucene.backward_codecs.lucene92.Lucene92Codec;
import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
Expand All @@ -23,6 +24,8 @@
import org.opensearch.knn.index.codec.KNN940Codec.KNN940PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec;
import org.opensearch.knn.index.codec.KNN950Codec.KNN950PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec;
import org.opensearch.knn.index.codec.KNN990Codec.KNN990PerFieldKnnVectorsFormat;

import java.util.Optional;
import java.util.function.BiFunction;
Expand Down Expand Up @@ -92,9 +95,24 @@ public enum KNNCodecVersion {
.knnVectorsFormat(new KNN950PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService)))
.build(),
KNN950Codec::new
),

V_9_9_0(
"KNN990Codec",
new Lucene99Codec(),
new KNN990PerFieldKnnVectorsFormat(Optional.empty()),
(delegate) -> new KNNFormatFacade(
new KNN80DocValuesFormat(delegate.docValuesFormat()),
new KNN80CompoundFormat(delegate.compoundFormat())
),
(userCodec, mapperService) -> KNN990Codec.builder()
.delegate(userCodec)
.knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService)))
.build(),
KNN990Codec::new
);

private static final KNNCodecVersion CURRENT = V_9_5_0;
private static final KNNCodecVersion CURRENT = V_9_9_0;

private final String codecName;
private final Codec defaultCodecDelegate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec
org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec
org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec
org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec
org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec
org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec
org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN950Codec;
package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.SneakyThrows;
import org.apache.lucene.codecs.Codec;
Expand All @@ -14,35 +14,35 @@
import java.util.Optional;
import java.util.function.Function;

import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0;
import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_9_0;

public class KNN950CodecTests extends KNNCodecTestCase {
public class KNN990CodecTests extends KNNCodecTestCase {

@SneakyThrows
public void testMultiFieldsKnnIndex() {
testMultiFieldsKnnIndex(KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build());
testMultiFieldsKnnIndex(KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build());
}

@SneakyThrows
public void testBuildFromModelTemplate() {
testBuildFromModelTemplate((KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build()));
testBuildFromModelTemplate((KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build()));
}

// Ensure that the codec is able to return the correct per field knn vectors format for codec
public void testCodecSetsCustomPerFieldKnnVectorsFormat() {
final Codec codec = new KNN950Codec();
assertTrue(codec.knnVectorsFormat() instanceof KNN950PerFieldKnnVectorsFormat);
final Codec codec = new KNN990Codec();
assertTrue(codec.knnVectorsFormat() instanceof KNN990PerFieldKnnVectorsFormat);
}

// IMPORTANT: When this Codec is moved to a backwards Codec, this test needs to be removed, because it attempts to
// write with a read only codec, which will fail
@SneakyThrows
public void testKnnVectorIndex() {
Function<MapperService, PerFieldKnnVectorsFormat> perFieldKnnVectorsFormatProvider = (
mapperService) -> new KNN950PerFieldKnnVectorsFormat(Optional.of(mapperService));
mapperService) -> new KNN990PerFieldKnnVectorsFormat(Optional.of(mapperService));

Function<PerFieldKnnVectorsFormat, Codec> knnCodecProvider = (knnVectorFormat) -> KNN950Codec.builder()
.delegate(V_9_5_0.getDefaultCodecDelegate())
Function<PerFieldKnnVectorsFormat, Codec> knnCodecProvider = (knnVectorFormat) -> KNN990Codec.builder()
.delegate(V_9_9_0.getDefaultCodecDelegate())
.knnVectorsFormat(knnVectorFormat)
.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.backward_codecs.lucene91.Lucene91Codec;
import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
import org.opensearch.knn.KNNTestCase;

import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_1_0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ public static SegmentInfo newSegmentInfo(final Directory directory, final String
segmentName,
docsInSegment,
false,
false,
codec,
Collections.emptyMap(),
randomByteArrayOfLength(StringHelper.ID_LENGTH),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ public void testQueryScoreForFaissWithModel() throws IOException {
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -270,6 +271,7 @@ public void testShardWithoutFiles() {
SEGMENT_NAME,
100,
false,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -313,6 +315,7 @@ public void testEmptyQueryResults() {
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -369,6 +372,7 @@ public void testANNWithFilterQuery_whenDoingANN_thenSuccess() {
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -617,6 +621,7 @@ private void testQueryScore(
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down

0 comments on commit 45e9e54

Please sign in to comment.