Address Feedback2

apache · Mar 19, 2020 · d732d7e · d732d7e
1 parent 6384b15
commit d732d7e
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 66 deletions.
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@@ -934,10 +934,6 @@ public void setScorer(Scorable scorer) {}
    */
   public static abstract class IteratorSupplierComparator<T> extends FieldComparator<T> implements LeafFieldComparator {
     abstract DocIdSetIterator iterator();
-
-    // This method is called from TopFieldCollector when already enough top hits have been collected.
-    // This method should be called every time the bottom entry is updated, and it informs the comparator
-    // to update its iterator to include possibly only docs that are "stronger" than the current bottom entry
-    abstract void updateIterator() throws IOException;
+    abstract void setHitsThresholdChecker(HitsThresholdChecker hitsThresholdChecker);
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java b/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java
@@ -31,6 +31,7 @@
 
 public class LongDocValuesPointComparator extends IteratorSupplierComparator<Long> {
     private final String field;
+    private final int numHits;
     private final boolean reverse;
     private final long missingValue;
     private final long[] values;
@@ -40,17 +41,24 @@ public class LongDocValuesPointComparator extends IteratorSupplierComparator<Lon
     protected NumericDocValues docValues;
     private DocIdSetIterator iterator;
     private PointValues pointValues;
+    private HitsThresholdChecker hitsThresholdChecker = null;
     private int maxDoc;
     private int maxDocVisited;
     private int updateCounter = 0;
 
     public LongDocValuesPointComparator(String field, int numHits, boolean reverse, Long missingValue) {
         this.field = field;
+        this.numHits = numHits;
         this.reverse = reverse;
         this.missingValue = missingValue != null ? missingValue : 0L;
         this.values = new long[numHits];
     }
 
+    @Override
+    void setHitsThresholdChecker(HitsThresholdChecker hitsThresholdChecker) {
+        this.hitsThresholdChecker = hitsThresholdChecker;
+    }
+
     private long getValueForDoc(int doc) throws IOException {
         if (docValues.advanceExact(doc)) {
             return docValues.longValue();
@@ -86,8 +94,13 @@ public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws I
     }
 
     @Override
-    public void setBottom(int slot) {
+    public void setBottom(int slot) throws IOException {
         this.bottom = values[slot];
+        // can't use hitsThresholdChecker.isThresholdReached() as it uses > numHits,
+        // while we want to update iterator as soon as threshold reaches numHits
+        if (hitsThresholdChecker != null && (hitsThresholdChecker.getHitsThreshold() >= numHits)) {
+            updateIterator();
+        }
     }
 
     @Override
@@ -113,7 +126,8 @@ public DocIdSetIterator iterator() {
         return iterator;
     }
 
-    public void updateIterator() throws IOException {
+    // update its iterator to include possibly only docs that are "stronger" than the current bottom entry
+    private void updateIterator() throws IOException {
         updateCounter++;
         if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
             return;

diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -144,8 +144,29 @@ public void setScorer(Scorable scorer) throws IOException {
 
     @Override
     public DocIdSetIterator iterator() {
-      if (itComparator == null) return null;
-      return itComparator.iterator();
+      if ((itComparator == null) || (itComparator.iterator() == null)) return null;
+      return new DocIdSetIterator() {
+        private int doc;
+        @Override
+        public int nextDoc() throws IOException {
+          return doc = itComparator.iterator().nextDoc();
+        }
+
+        @Override
+        public int docID() {
+          return doc;
+        }
+
+        @Override
+        public long cost() {
+          return itComparator.iterator().cost();
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+          return doc = itComparator.iterator().advance(target);
+        }
+      };
     }
   }
 
@@ -285,7 +306,6 @@ public void collect(int doc) throws IOException {
   final boolean canSetMinScore;
 
   final FieldComparator.IteratorSupplierComparator itComparator;
-  final boolean canUpdateIterator;
 
   // an accumulator that maintains the maximum of the segment's minimum competitive scores
   final MaxScoreAccumulator minScoreAcc;
@@ -330,11 +350,10 @@ private TopFieldCollector(FieldValueHitQueue<Entry> pq, int numHits,
     if ((fieldComparator instanceof FieldComparator.IteratorSupplierComparator) &&
             (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE)) {
       scoreMode = ScoreMode.TOP_DOCS; // TODO: may be add another scoreMode TOP_DOCS with scores
-      itComparator = (FieldComparator.IteratorSupplierComparator) fieldComparator;
-      canUpdateIterator = true;
+      itComparator = ((FieldComparator.IteratorSupplierComparator) fieldComparator);
+      itComparator.setHitsThresholdChecker(hitsThresholdChecker);
     } else {
       itComparator = null;
-      canUpdateIterator = false;
     }
   }
 
@@ -373,10 +392,7 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
           minScoreAcc.accumulate(bottom.doc, minScore);
         }
       }
-    }
-
-    if (canUpdateIterator) {
-      itComparator.updateIterator();
+    } else if (scoreMode == ScoreMode.TOP_DOCS) {
       totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
     }
   }

diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -23,6 +23,7 @@
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.util.Bits;
+import java.util.Arrays;
 
 /**
  * Expert: Calculate query weights and build query scorers.
@@ -202,78 +203,65 @@ public long cost() {
     public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
       collector.setScorer(scorer);
       DocIdSetIterator scorerIterator = twoPhase == null? iterator: twoPhase.approximation();
-      DocIdSetIterator combinedIterator = collector.iterator() == null ? scorerIterator: combineScorerAndCollectorIterators(scorerIterator, collector);
+      DocIdSetIterator collectorIterator = collector.iterator();
+      DocIdSetIterator combinedIterator = collectorIterator == null ? scorerIterator :
+              ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator));
       if (scorer.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
         scoreAll(collector, combinedIterator, twoPhase, acceptDocs);
         return DocIdSetIterator.NO_MORE_DOCS;
       } else {
         int doc = scorer.docID();
-        if (doc < min) scorerIterator.advance(min);
+        if (doc < min) {
+          scorerIterator.advance(min);
+        }
         return scoreRange(collector, combinedIterator, twoPhase, acceptDocs, doc, max);
       }
     }
 
-    // conjunction iterator between scorer's iterator and collector's iterator
-    static private DocIdSetIterator combineScorerAndCollectorIterators(DocIdSetIterator scorerIterator, LeafCollector collector) {
-      return new DocIdSetIterator() {
-        int doc = -1;
-        @Override
-        public int docID() {
-          return doc;
-        }
-
-        @Override
-        public int nextDoc() throws IOException {
-          return advance(doc + 1);
-        }
-
-        @Override
-        public int advance(int target) throws IOException {
-          int doc1 = scorerIterator.advance(target);
-          int doc2 = collector.iterator().advance(target);
-          while((doc1 != DocIdSetIterator.NO_MORE_DOCS) && (doc2 != DocIdSetIterator.NO_MORE_DOCS)) {
-            if (doc1 == doc2) {
-              doc = doc1;
-              return doc1;
-            } else if (doc1 > doc2) {
-              doc2 = collector.iterator().advance(doc1);
-            } else {
-              doc1 = scorerIterator.advance(doc2);
-            }
-          }
-          return DocIdSetIterator.NO_MORE_DOCS;
-        }
-
-        @Override
-        public long cost() {
-          return scorerIterator.cost();
-        }
-      };
-    }
-
     /** Specialized method to bulk-score a range of hits; we
      *  separate this from {@link #scoreAll} to help out
      *  hotspot.
      *  See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */
     static int scoreRange(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase,
         Bits acceptDocs, int currentDoc, int end) throws IOException {
-      while (currentDoc < end) {
-        if ((acceptDocs == null || acceptDocs.get(currentDoc)) && (twoPhase == null || twoPhase.matches())) {
-          collector.collect(currentDoc);
+      if (twoPhase == null) {
+        while (currentDoc < end) {
+          if (acceptDocs == null || acceptDocs.get(currentDoc)) {
+            collector.collect(currentDoc);
+          }
+          currentDoc = iterator.nextDoc();
+        }
+        return currentDoc;
+      } else {
+        final DocIdSetIterator approximation = twoPhase.approximation();
+        while (currentDoc < end) {
+          if ((acceptDocs == null || acceptDocs.get(currentDoc)) && twoPhase.matches()) {
+            collector.collect(currentDoc);
+          }
+          currentDoc = approximation.nextDoc();
         }
-        currentDoc = iterator.nextDoc();
+        return currentDoc;
       }
-      return currentDoc;
     }
-    
+
     /** Specialized method to bulk-score all hits; we
      *  separate this from {@link #scoreRange} to help out
      *  hotspot.
      *  See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */
     static void scoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs) throws IOException {
-      for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
-        if ((acceptDocs == null || acceptDocs.get(doc)) && (twoPhase == null || twoPhase.matches())) {
-          collector.collect(doc);
+      if (twoPhase == null) {
+        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
+          if (acceptDocs == null || acceptDocs.get(doc)) {
+            collector.collect(doc);
+          }
+        }
+      } else {
+        // The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirm
+        final DocIdSetIterator approximation = twoPhase.approximation();
+        for (int doc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) {
+          if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
+            collector.collect(doc);
+          }
         }
       }
     }