From d732d7eb9de67a597f67e91c9774104aa055e293 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 19 Mar 2020 15:51:39 -0400 Subject: [PATCH] Address Feedback2 --- .../apache/lucene/search/FieldComparator.java | 6 +- .../search/LongDocValuesPointComparator.java | 18 +++- .../lucene/search/TopFieldCollector.java | 36 +++++--- .../java/org/apache/lucene/search/Weight.java | 86 ++++++++----------- 4 files changed, 80 insertions(+), 66 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index d8bed8897665..1a5df0b84c8f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -934,10 +934,6 @@ public void setScorer(Scorable scorer) {} */ public static abstract class IteratorSupplierComparator extends FieldComparator implements LeafFieldComparator { abstract DocIdSetIterator iterator(); - - // This method is called from TopFieldCollector when already enough top hits have been collected. - // This method should be called every time the bottom entry is updated, and it informs the comparator - // to update its iterator to include possibly only docs that are "stronger" than the current bottom entry - abstract void updateIterator() throws IOException; + abstract void setHitsThresholdChecker(HitsThresholdChecker hitsThresholdChecker); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java b/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java index 979d8b74a355..dcfb6c4f66cf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java @@ -31,6 +31,7 @@ public class LongDocValuesPointComparator extends IteratorSupplierComparator { private final String field; + private final int numHits; private final boolean reverse; private final long missingValue; private final long[] values; @@ -40,17 +41,24 @@ public class LongDocValuesPointComparator extends IteratorSupplierComparator numHits, + // while we want to update iterator as soon as threshold reaches numHits + if (hitsThresholdChecker != null && (hitsThresholdChecker.getHitsThreshold() >= numHits)) { + updateIterator(); + } } @Override @@ -113,7 +126,8 @@ public DocIdSetIterator iterator() { return iterator; } - public void updateIterator() throws IOException { + // update its iterator to include possibly only docs that are "stronger" than the current bottom entry + private void updateIterator() throws IOException { updateCounter++; if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much return; diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index b09db04551ed..a3e6457ab763 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -144,8 +144,29 @@ public void setScorer(Scorable scorer) throws IOException { @Override public DocIdSetIterator iterator() { - if (itComparator == null) return null; - return itComparator.iterator(); + if ((itComparator == null) || (itComparator.iterator() == null)) return null; + return new DocIdSetIterator() { + private int doc; + @Override + public int nextDoc() throws IOException { + return doc = itComparator.iterator().nextDoc(); + } + + @Override + public int docID() { + return doc; + } + + @Override + public long cost() { + return itComparator.iterator().cost(); + } + + @Override + public int advance(int target) throws IOException { + return doc = itComparator.iterator().advance(target); + } + }; } } @@ -285,7 +306,6 @@ public void collect(int doc) throws IOException { final boolean canSetMinScore; final FieldComparator.IteratorSupplierComparator itComparator; - final boolean canUpdateIterator; // an accumulator that maintains the maximum of the segment's minimum competitive scores final MaxScoreAccumulator minScoreAcc; @@ -330,11 +350,10 @@ private TopFieldCollector(FieldValueHitQueue pq, int numHits, if ((fieldComparator instanceof FieldComparator.IteratorSupplierComparator) && (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE)) { scoreMode = ScoreMode.TOP_DOCS; // TODO: may be add another scoreMode TOP_DOCS with scores - itComparator = (FieldComparator.IteratorSupplierComparator) fieldComparator; - canUpdateIterator = true; + itComparator = ((FieldComparator.IteratorSupplierComparator) fieldComparator); + itComparator.setHitsThresholdChecker(hitsThresholdChecker); } else { itComparator = null; - canUpdateIterator = false; } } @@ -373,10 +392,7 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException { minScoreAcc.accumulate(bottom.doc, minScore); } } - } - - if (canUpdateIterator) { - itComparator.updateIterator(); + } else if (scoreMode == ScoreMode.TOP_DOCS) { totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index c08328ea1018..2fbe25600e7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.Bits; +import java.util.Arrays; /** * Expert: Calculate query weights and build query scorers. @@ -202,78 +203,65 @@ public long cost() { public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { collector.setScorer(scorer); DocIdSetIterator scorerIterator = twoPhase == null? iterator: twoPhase.approximation(); - DocIdSetIterator combinedIterator = collector.iterator() == null ? scorerIterator: combineScorerAndCollectorIterators(scorerIterator, collector); + DocIdSetIterator collectorIterator = collector.iterator(); + DocIdSetIterator combinedIterator = collectorIterator == null ? scorerIterator : + ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator)); if (scorer.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) { scoreAll(collector, combinedIterator, twoPhase, acceptDocs); return DocIdSetIterator.NO_MORE_DOCS; } else { int doc = scorer.docID(); - if (doc < min) scorerIterator.advance(min); + if (doc < min) { + scorerIterator.advance(min); + } return scoreRange(collector, combinedIterator, twoPhase, acceptDocs, doc, max); } } - // conjunction iterator between scorer's iterator and collector's iterator - static private DocIdSetIterator combineScorerAndCollectorIterators(DocIdSetIterator scorerIterator, LeafCollector collector) { - return new DocIdSetIterator() { - int doc = -1; - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - int doc1 = scorerIterator.advance(target); - int doc2 = collector.iterator().advance(target); - while((doc1 != DocIdSetIterator.NO_MORE_DOCS) && (doc2 != DocIdSetIterator.NO_MORE_DOCS)) { - if (doc1 == doc2) { - doc = doc1; - return doc1; - } else if (doc1 > doc2) { - doc2 = collector.iterator().advance(doc1); - } else { - doc1 = scorerIterator.advance(doc2); - } - } - return DocIdSetIterator.NO_MORE_DOCS; - } - - @Override - public long cost() { - return scorerIterator.cost(); - } - }; - } - /** Specialized method to bulk-score a range of hits; we * separate this from {@link #scoreAll} to help out * hotspot. * See LUCENE-5487 */ static int scoreRange(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs, int currentDoc, int end) throws IOException { - while (currentDoc < end) { - if ((acceptDocs == null || acceptDocs.get(currentDoc)) && (twoPhase == null || twoPhase.matches())) { - collector.collect(currentDoc); + if (twoPhase == null) { + while (currentDoc < end) { + if (acceptDocs == null || acceptDocs.get(currentDoc)) { + collector.collect(currentDoc); + } + currentDoc = iterator.nextDoc(); + } + return currentDoc; + } else { + final DocIdSetIterator approximation = twoPhase.approximation(); + while (currentDoc < end) { + if ((acceptDocs == null || acceptDocs.get(currentDoc)) && twoPhase.matches()) { + collector.collect(currentDoc); + } + currentDoc = approximation.nextDoc(); } - currentDoc = iterator.nextDoc(); + return currentDoc; } - return currentDoc; } - + /** Specialized method to bulk-score all hits; we * separate this from {@link #scoreRange} to help out * hotspot. * See LUCENE-5487 */ static void scoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs) throws IOException { - for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { - if ((acceptDocs == null || acceptDocs.get(doc)) && (twoPhase == null || twoPhase.matches())) { - collector.collect(doc); + if (twoPhase == null) { + for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { + if (acceptDocs == null || acceptDocs.get(doc)) { + collector.collect(doc); + } + } + } else { + // The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirm + final DocIdSetIterator approximation = twoPhase.approximation(); + for (int doc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) { + if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { + collector.collect(doc); + } } } }