diff --git a/CHANGELOG.md b/CHANGELOG.md index eff969e01..7ef36dc57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add check to directly use ANN Search when filters match all docs. (#2320)[https://github.com/opensearch-project/k-NN/pull/2320] ### Bug Fixes * Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282] +* Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365] * Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315] * Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346] * Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352] diff --git a/src/main/java/org/opensearch/knn/common/KNNVectorUtil.java b/src/main/java/org/opensearch/knn/common/KNNVectorUtil.java index 778cc164d..ea826c1ff 100644 --- a/src/main/java/org/opensearch/knn/common/KNNVectorUtil.java +++ b/src/main/java/org/opensearch/knn/common/KNNVectorUtil.java @@ -7,9 +7,7 @@ import lombok.AccessLevel; import lombok.NoArgsConstructor; -import org.opensearch.knn.index.vectorvalues.KNNVectorValues; -import java.io.IOException; import java.util.List; import java.util.Objects; @@ -62,17 +60,4 @@ public static int[] intListToArray(final List integerList) { } return intArray; } - - /** - * Iterates vector values once if it is not at start of the location, - * Intended to be done to make sure dimension and bytesPerVector are available - * @param vectorValues - * @throws IOException - */ - public static void iterateVectorValuesOnce(final KNNVectorValues vectorValues) throws IOException { - if (vectorValues.docId() == -1) { - vectorValues.nextDoc(); - vectorValues.getVector(); - } - } } diff --git a/src/main/java/org/opensearch/knn/index/codec/nativeindex/DefaultIndexBuildStrategy.java b/src/main/java/org/opensearch/knn/index/codec/nativeindex/DefaultIndexBuildStrategy.java index 23c3ba116..15d38a079 100644 --- a/src/main/java/org/opensearch/knn/index/codec/nativeindex/DefaultIndexBuildStrategy.java +++ b/src/main/java/org/opensearch/knn/index/codec/nativeindex/DefaultIndexBuildStrategy.java @@ -23,7 +23,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.opensearch.knn.common.KNNConstants.MODEL_ID; import static org.opensearch.knn.common.KNNVectorUtil.intListToArray; -import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce; +import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues; import static org.opensearch.knn.index.codec.transfer.OffHeapVectorTransferFactory.getVectorTransfer; /** @@ -52,7 +52,7 @@ public static DefaultIndexBuildStrategy getInstance() { public void buildAndWriteIndex(final BuildIndexParams indexInfo) throws IOException { final KNNVectorValues knnVectorValues = indexInfo.getVectorValues(); // Needed to make sure we don't get 0 dimensions while initializing index - iterateVectorValuesOnce(knnVectorValues); + initializeVectorValues(knnVectorValues); IndexBuildSetup indexBuildSetup = QuantizationIndexUtils.prepareIndexBuild(knnVectorValues, indexInfo); try ( diff --git a/src/main/java/org/opensearch/knn/index/codec/nativeindex/MemOptimizedNativeIndexBuildStrategy.java b/src/main/java/org/opensearch/knn/index/codec/nativeindex/MemOptimizedNativeIndexBuildStrategy.java index 81f5915a7..2864be6d2 100644 --- a/src/main/java/org/opensearch/knn/index/codec/nativeindex/MemOptimizedNativeIndexBuildStrategy.java +++ b/src/main/java/org/opensearch/knn/index/codec/nativeindex/MemOptimizedNativeIndexBuildStrategy.java @@ -22,7 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.opensearch.knn.common.KNNVectorUtil.intListToArray; -import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce; +import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues; import static org.opensearch.knn.index.codec.transfer.OffHeapVectorTransferFactory.getVectorTransfer; /** @@ -53,7 +53,7 @@ public static MemOptimizedNativeIndexBuildStrategy getInstance() { public void buildAndWriteIndex(final BuildIndexParams indexInfo) throws IOException { final KNNVectorValues knnVectorValues = indexInfo.getVectorValues(); // Needed to make sure we don't get 0 dimensions while initializing index - iterateVectorValuesOnce(knnVectorValues); + initializeVectorValues(knnVectorValues); KNNEngine engine = indexInfo.getKnnEngine(); Map indexParameters = indexInfo.getParameters(); IndexBuildSetup indexBuildSetup = QuantizationIndexUtils.prepareIndexBuild(knnVectorValues, indexInfo); diff --git a/src/main/java/org/opensearch/knn/index/codec/nativeindex/NativeIndexWriter.java b/src/main/java/org/opensearch/knn/index/codec/nativeindex/NativeIndexWriter.java index 27a1ecfb6..7078645e5 100644 --- a/src/main/java/org/opensearch/knn/index/codec/nativeindex/NativeIndexWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/nativeindex/NativeIndexWriter.java @@ -43,7 +43,7 @@ import static org.opensearch.knn.common.FieldInfoExtractor.extractVectorDataType; import static org.opensearch.knn.common.KNNConstants.MODEL_ID; import static org.opensearch.knn.common.KNNConstants.PARAMETERS; -import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce; +import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues; import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName; import static org.opensearch.knn.index.engine.faiss.Faiss.FAISS_BINARY_INDEX_DESCRIPTION_PREFIX; @@ -100,7 +100,7 @@ public static NativeIndexWriter getWriter( * @throws IOException */ public void flushIndex(final KNNVectorValues knnVectorValues, int totalLiveDocs) throws IOException { - iterateVectorValuesOnce(knnVectorValues); + initializeVectorValues(knnVectorValues); buildAndWriteIndex(knnVectorValues, totalLiveDocs); recordRefreshStats(); } @@ -111,7 +111,7 @@ public void flushIndex(final KNNVectorValues knnVectorValues, int totalLiveDo * @throws IOException */ public void mergeIndex(final KNNVectorValues knnVectorValues, int totalLiveDocs) throws IOException { - iterateVectorValuesOnce(knnVectorValues); + initializeVectorValues(knnVectorValues); if (knnVectorValues.docId() == NO_MORE_DOCS) { // This is in place so we do not add metrics log.debug("Skipping mergeIndex, vector values are already iterated for {}", fieldInfo.name); diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java index 3ccfc3c2b..f2db1f25d 100644 --- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java +++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java @@ -9,12 +9,15 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.search.DocIdSetIterator; import org.opensearch.knn.common.FieldInfoExtractor; import org.opensearch.knn.common.KNNConstants; import org.opensearch.knn.index.VectorDataType; import org.opensearch.knn.index.codec.KNN80Codec.KNN80BinaryDocValues; import org.opensearch.knn.index.engine.KNNEngine; +import org.opensearch.knn.index.vectorvalues.KNNVectorValues; +import java.io.IOException; import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; @@ -116,6 +119,31 @@ public static String getNativeEngineFileFromFieldInfo(FieldInfo field, SegmentIn } } + /** + * Positions the vectorValuesIterator to the first vector document ID if not already positioned there. + * This initialization is crucial for setting up vector dimensions and other properties in VectorValues. + *

+ * If the VectorValues contains no vector documents, the iterator will be positioned at + * {@link DocIdSetIterator#NO_MORE_DOCS} + * + * @param vectorValues {@link KNNVectorValues} + * @throws IOException if there is an error while accessing the vector values + */ + public static void initializeVectorValues(final KNNVectorValues vectorValues) throws IOException { + // The docId will be set to -1 if next doc has never been called yet. If it has already been called, + // no need to advance the vector values + if (vectorValues.docId() != -1) { + return; + } + // Ensure that we are not getting the next vector if there are no more docs + vectorValues.nextDoc(); + if (vectorValues.docId() == DocIdSetIterator.NO_MORE_DOCS) { + // Ensure that we are not getting the vector if there are no more docs + return; + } + vectorValues.getVector(); + } + /** * Get KNNEngine From FieldInfo * diff --git a/src/test/java/org/opensearch/knn/common/KNNVectorUtilTests.java b/src/test/java/org/opensearch/knn/common/KNNVectorUtilTests.java index d64b73c9a..5712a7f3a 100644 --- a/src/test/java/org/opensearch/knn/common/KNNVectorUtilTests.java +++ b/src/test/java/org/opensearch/knn/common/KNNVectorUtilTests.java @@ -11,17 +11,10 @@ package org.opensearch.knn.common; -import lombok.SneakyThrows; import org.opensearch.knn.KNNTestCase; -import org.opensearch.knn.index.VectorDataType; -import org.opensearch.knn.index.vectorvalues.KNNVectorValues; -import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory; -import org.opensearch.knn.index.vectorvalues.TestVectorValues; import java.util.List; -import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce; - public class KNNVectorUtilTests extends KNNTestCase { public void testByteZeroVector() { assertTrue(KNNVectorUtil.isZeroVector(new byte[] { 0, 0, 0 })); @@ -38,23 +31,4 @@ public void testIntListToArray() { assertNull(KNNVectorUtil.intListToArray(List.of())); assertNull(KNNVectorUtil.intListToArray(null)); } - - @SneakyThrows - public void testInit() { - // Give - final List floatArray = List.of(new float[] { 1, 2 }, new float[] { 2, 3 }); - final int dimension = floatArray.get(0).length; - final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues( - floatArray - ); - final KNNVectorValues knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues); - - // When - iterateVectorValuesOnce(knnVectorValues); - - // Then - assertNotEquals(-1, knnVectorValues.docId()); - assertArrayEquals(floatArray.get(0), knnVectorValues.getVector(), 0.001f); - assertEquals(dimension, knnVectorValues.dimension()); - } } diff --git a/src/test/java/org/opensearch/knn/index/codec/util/KNNCodecUtilTests.java b/src/test/java/org/opensearch/knn/index/codec/util/KNNCodecUtilTests.java index 86e22cd88..b6680925e 100644 --- a/src/test/java/org/opensearch/knn/index/codec/util/KNNCodecUtilTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/util/KNNCodecUtilTests.java @@ -6,16 +6,24 @@ package org.opensearch.knn.index.codec.util; import junit.framework.TestCase; +import lombok.SneakyThrows; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.search.DocIdSetIterator; +import org.junit.Assert; import org.opensearch.knn.index.VectorDataType; import org.opensearch.knn.index.engine.KNNEngine; +import org.opensearch.knn.index.vectorvalues.KNNVectorValues; +import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory; +import org.opensearch.knn.index.vectorvalues.TestVectorValues; +import java.util.Collections; import java.util.List; import java.util.Set; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.opensearch.knn.index.codec.util.KNNCodecUtil.calculateArraySize; +import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues; public class KNNCodecUtilTests extends TestCase { @@ -46,4 +54,38 @@ public void testGetKNNEngines() { assertEquals(engineFiles.size(), 2); assertTrue(engineFiles.get(0).equals("_0_2011_target_field.faissc")); } + + @SneakyThrows + public void testInitializeVectorValues_whenValidVectorValues_thenSuccess() { + // Give + final List floatArray = List.of(new float[] { 1, 2 }, new float[] { 2, 3 }); + final int dimension = floatArray.get(0).length; + final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues( + floatArray + ); + final KNNVectorValues knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues); + + // When + initializeVectorValues(knnVectorValues); + + // Then + Assert.assertNotEquals(-1, knnVectorValues.docId()); + Assert.assertArrayEquals(floatArray.get(0), knnVectorValues.getVector(), 0.001f); + assertEquals(dimension, knnVectorValues.dimension()); + } + + @SneakyThrows + public void testInitializeVectorValues_whenNoDocs_thenSuccess() { + // Give + final List floatArray = Collections.emptyList(); + final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues( + floatArray + ); + final KNNVectorValues knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues); + + // When + initializeVectorValues(knnVectorValues); + // Then + Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, knnVectorValues.docId()); + } }