apache · mayya-sharipova · Jun 23, 2020 · Mar 13, 2020 · Mar 18, 2020 · Mar 19, 2020
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -115,7 +115,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
       return new ConstantScoreWeight(this, boost) {
         @Override
         public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
-          if (scoreMode == ScoreMode.TOP_SCORES) {
+          if (scoreMode == ScoreMode.TOP_SCORES || scoreMode == ScoreMode.TOP_DOCS) {
             return super.bulkScorer(context);
           }
           final BulkScorer innerScorer = innerWeight.bulkScorer(context);

diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@@ -928,4 +928,16 @@ public int compareTop(int doc) throws IOException {
     @Override
     public void setScorer(Scorable scorer) {}
   }
+
+  /**
+   * A field comparator that can provide an iterator over competitive documents.
+   */
+  public static abstract class IteratorSupplierComparator<T> extends FieldComparator<T> implements LeafFieldComparator {
+    abstract DocIdSetIterator iterator();
+
+    // This method is called from TopFieldCollector when already enough top hits have been collected.
+    // This method should be called every time the bottom entry is updated, and it informs the comparator
+    // to update its iterator to include possibly only docs that are "stronger" than the current bottom entry
+    abstract void updateIterator() throws IOException;
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java
@@ -53,4 +53,8 @@ public String toString() {
     return name + "(" + in + ")";
   }
 
+  @Override
+  public DocIdSetIterator iterator() {
+    return in.iterator();
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
@@ -93,4 +93,11 @@ public interface LeafCollector {
    */
   void collect(int doc) throws IOException;
 
+  /*
+   * optionally returns an iterator over competitive documents
+   */
+  default DocIdSetIterator iterator() {
+    return null;
+  }
+
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java b/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointComparator.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.util.DocIdSetBuilder;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.apache.lucene.search.FieldComparator.IteratorSupplierComparator;
+
+public class LongDocValuesPointComparator extends IteratorSupplierComparator<Long> {
+    private final String field;
+    private final boolean reverse;
+    private final long missingValue;
+    private final long[] values;
+    private long bottom;
+    private long topValue;
+    boolean hasTopValue = false; // indicates that topValue for searchAfter is set
+    protected NumericDocValues docValues;
+    private DocIdSetIterator iterator;
+    private PointValues pointValues;
+    private int maxDoc;
+    private int maxDocVisited;
+    private int updateCounter = 0;
+
+    public LongDocValuesPointComparator(String field, int numHits, boolean reverse, Long missingValue) {
+        this.field = field;
+        this.reverse = reverse;
+        this.missingValue = missingValue != null ? missingValue : 0L;
+        this.values = new long[numHits];
+    }
+
+    private long getValueForDoc(int doc) throws IOException {
+        if (docValues.advanceExact(doc)) {
+            return docValues.longValue();
+        } else {
+            return missingValue;
+        }
+    }
+
+    @Override
+    public int compare(int slot1, int slot2) {
+        return Long.compare(values[slot1], values[slot2]);
+    }
+
+    @Override
+    public void setTopValue(Long value) {
+        topValue = value;
+        hasTopValue = true;
+    }
+
+    @Override
+    public Long value(int slot) {
+        return Long.valueOf(values[slot]);
+    }
+
+    @Override
+    public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
+        docValues = DocValues.getNumeric(context.reader(), field);
+        iterator = docValues;
+        pointValues = context.reader().getPointValues(field);
+        maxDoc = context.reader().maxDoc();
+        maxDocVisited = 0;
+        return this;
+    }
+
+    @Override
+    public void setBottom(int slot) throws IOException {
+        this.bottom = values[slot];
+    }
+
+    @Override
+    public int compareBottom(int doc) throws IOException {
+        return Long.compare(bottom, getValueForDoc(doc));
+    }
+
+    @Override
+    public int compareTop(int doc) throws IOException {
+        return Long.compare(topValue, getValueForDoc(doc));
+    }
+
+    @Override
+    public void copy(int slot, int doc) throws IOException {
+        maxDocVisited = doc;
+        values[slot] = getValueForDoc(doc);
+    }
+
+    @Override
+    public void setScorer(Scorable scorer) throws IOException {}
+
+    public DocIdSetIterator iterator() {
+        return iterator;
+    }
+
+    // update its iterator to include possibly only docs that are "stronger" than the current bottom entry
+    public void updateIterator() throws IOException {
+        updateCounter++;
+        if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
+            return;
+        }
+
+        final byte[] maxValueAsBytes = reverse == false ? new byte[Long.BYTES] : hasTopValue ? new byte[Long.BYTES]: null;
+        final byte[] minValueAsBytes = reverse ? new byte[Long.BYTES] : hasTopValue ? new byte[Long.BYTES]: null;
+        if (reverse == false) {
+            LongPoint.encodeDimension(bottom, maxValueAsBytes, 0);
+            if (hasTopValue) {
+                LongPoint.encodeDimension(topValue, minValueAsBytes, 0);
+            }
+        } else {
+            LongPoint.encodeDimension(bottom, minValueAsBytes, 0);
+            if (hasTopValue) {
+                LongPoint.encodeDimension(topValue, maxValueAsBytes, 0);
+            }
+        };
+
+        DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
+        PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
+            DocIdSetBuilder.BulkAdder adder;
+            @Override
+            public void grow(int count) {
+                adder = result.grow(count);
+            }
+
+            @Override
+            public void visit(int docID) {
+                if (docID <= maxDocVisited) {
+                    return; // Already visited or skipped
+                }
+                adder.add(docID);
+            }
+
+            @Override
+            public void visit(int docID, byte[] packedValue) {
+                if (docID <= maxDocVisited) {
+                    return;  // Already visited or skipped
+                }
+                if (maxValueAsBytes != null) {
+                    // doc's value is too high
+                    if (Arrays.compareUnsigned(packedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES) > 0) return;
+                }
+                if (minValueAsBytes != null) {
+                    // doc's value is too low
+                    if (Arrays.compareUnsigned(packedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES) < 0) return;
+                }
+                adder.add(docID); // doc is competitive
+            }
+
+            @Override
+            public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+                if (((maxValueAsBytes != null) &&
+                        Arrays.compareUnsigned(minPackedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES) > 0) ||
+                        ((minValueAsBytes != null) &&
+                        Arrays.compareUnsigned(maxPackedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES) < 0)) {
+                    return PointValues.Relation.CELL_OUTSIDE_QUERY;
+                }
+                if (((maxValueAsBytes != null) &&
+                        Arrays.compareUnsigned(maxPackedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES) > 0) ||
+                        ((minValueAsBytes != null) &&
+                        Arrays.compareUnsigned(minPackedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES) < 0)) {
+                    return PointValues.Relation.CELL_CROSSES_QUERY;
+                }
+                return PointValues.Relation.CELL_INSIDE_QUERY;
+            }
+        };
+        final long threshold = iterator.cost() >>> 3;
+        long estimatedNumberOfMatches = pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
+        if (estimatedNumberOfMatches >= threshold) {
+            // the new range is not selective enough to be worth materializing, it doesn't reduce number of docs at least 8x
+            return;
+        }
+        pointValues.intersect(visitor);
+        this.iterator = result.build().iterator();
+    }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointSortField.java b/lucene/core/src/java/org/apache/lucene/search/LongDocValuesPointSortField.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+/*
+ * Sort field for long values indexed both with doc values and points.
+ * Use this field if you want to skip collecting non-competitive documents,
+ * which in some cases can significantly speed up sort queries.
+ */
+public class LongDocValuesPointSortField extends SortField {
+
+    LongDocValuesPointSortField(String field) {
+        super(field, SortField.Type.CUSTOM);
+    }
+
+    LongDocValuesPointSortField(String field, boolean reverse) {
+        super(field, SortField.Type.CUSTOM, reverse);
+    }
+
+    @Override
+    public FieldComparator<?> getComparator(int numHits, int sortPos) {
+        if (sortPos == 0) {
+            return new LongDocValuesPointComparator(getField(), numHits, reverse, (Long) missingValue);
+        } else {
+            return new FieldComparator.LongComparator(numHits, getField(), (Long) missingValue);
+        }
+    }
+
+    @Override
+    public void setMissingValue(Object missingValue) {
+        if (missingValue != null && missingValue.getClass() != Long.class)
+            throw new IllegalArgumentException("Missing values for Type.LONG can only be of type java.lang.Long, but got " + missingValue.getClass());
+        this.missingValue = missingValue;
+    }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
@@ -46,7 +46,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
 
       @Override
       public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
-        if (scoreMode == ScoreMode.TOP_SCORES) {
+        if ((scoreMode == ScoreMode.TOP_SCORES) || (scoreMode == ScoreMode.TOP_DOCS)) {
           return super.bulkScorer(context);
         }
         final float score = score();

diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java b/lucene/core/src/java/org/apache/lucene/search/ScoreMode.java
@@ -51,6 +51,17 @@ public boolean needsScores() {
     public boolean needsScores() {
       return true;
     }
+  },
+
+  /**
+   * ScoreMode for collectors that can provide their own iterators,
+   * to optionally allow to skip for non-competitive docs
+   */
+  TOP_DOCS {
+    @Override
+    public boolean needsScores() {
+      return false;
+    }
   };
 
   /**