From fcc384fdafcbd7fb86bb9a2dff566515895abcff Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Fri, 18 Feb 2022 11:36:22 -0800 Subject: [PATCH] LUCENE-10408: Fix vector values iteration bug (#690) Now that there is special logic to handle the dense case, we need to adjust some assertions in VectorValues#advance. --- .../lucene91/Lucene91HnswVectorsReader.java | 7 ++-- .../search/TestKnnVectorFieldExistsQuery.java | 35 +++++++++++++++++++ .../lucene/search/TestKnnVectorQuery.java | 1 - 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java index d6f4c04cc7ac..014414882e76 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java @@ -475,11 +475,10 @@ public int advance(int target) { } } - assert ord <= size; - if (ord == size) { - doc = NO_MORE_DOCS; - } else { + if (ord < size) { doc = ordToDocOperator.applyAsInt(ord); + } else { + doc = NO_MORE_DOCS; } return doc; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorFieldExistsQuery.java index cdfae67b3451..3444ecab1193 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorFieldExistsQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorFieldExistsQuery.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.LuceneTestCase; @@ -98,6 +99,40 @@ public void testAllDocsHaveField() throws IOException { } } + public void testConjunction() throws IOException { + try (Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + int numDocs = atLeast(100); + int numVectors = 0; + + boolean allDocsHaveVector = random().nextBoolean(); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + if (allDocsHaveVector || random().nextBoolean()) { + doc.add(new KnnVectorField("vector", randomVector(5))); + numVectors++; + } + doc.add(new StringField("field", "value" + (i % 2), Store.NO)); + iw.addDocument(doc); + } + try (IndexReader reader = iw.getReader()) { + IndexSearcher searcher = newSearcher(reader); + Occur occur = random().nextBoolean() ? Occur.MUST : Occur.FILTER; + BooleanQuery booleanQuery = + new BooleanQuery.Builder() + .add(new TermQuery(new Term("field", "value1")), occur) + .add(new KnnVectorFieldExistsQuery("vector"), Occur.FILTER) + .build(); + + int count = searcher.count(booleanQuery); + assertTrue(count <= numVectors); + if (allDocsHaveVector) { + assertEquals(numDocs / 2, count); + } + } + } + } + public void testFieldExistsButNoDocsHaveField() throws IOException { try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java index aa576b8d2a14..c0d327aa6ea3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java @@ -486,7 +486,6 @@ public void testRandom() throws IOException { } /** Tests with random vectors and a random filter. Uses RandomIndexWriter. */ - @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-10408") public void testRandomWithFilter() throws IOException { int numDocs = 200; int dimension = atLeast(5);