Skip to content

Commit

Permalink
LUCENE-9384: Backport for field sort optimization (apache#1610)
Browse files Browse the repository at this point in the history
Backport for: LUCENE-9280: Collectors to skip noncompetitive documents (apache#1351)

Similar how scorers can update their iterators to skip non-competitive
documents, collectors and comparators should also provide and update
iterators that allow them to skip non-competive documents.

To enable sort optimization for numeric sort fields,
the following needs to be done:
1) the field should be indexed with both doc_values and points, that
must have the same field name and same data
2) SortField#setCanUsePoints must be set
3) totalHitsThreshold should not be set to max value.
  • Loading branch information
mayya-sharipova authored and gus-asf committed Sep 4, 2020
1 parent 648fd90 commit d06fb96
Show file tree
Hide file tree
Showing 16 changed files with 1,032 additions and 64 deletions.
8 changes: 8 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,14 @@ Optimizations

* LUCENE-9148: Points now write their index in a separate file. (Adrien Grand)

* LUCENE-9280: Add an ability for field comparators to skip non-competitive documents.
Creating a TopFieldCollector with totalHitsThreshold less than Integer.MAX_VALUE
instructs Lucene to skip non-competitive documents whenever possible. For numeric
sort fields the skipping functionality works when the same field is indexed both
with doc values and points. To indicate that the same data is stored in these points
and doc values SortField#setCanUsePoints method should be used.
(Mayya Sharipova, Jim Ferenczi, Adrien Grand)

Bug Fixes
---------------------
* LUCENE-9259: Fix wrong NGramFilterFactory argument name for preserveOriginal option (Paul Pazderski)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
return new ConstantScoreWeight(this, boost) {
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
if (scoreMode == ScoreMode.TOP_SCORES) {
if (scoreMode.isExhaustive() == false) {
return super.bulkScorer(context);
}
final BulkScorer innerScorer = innerWeight.bulkScorer(context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ protected NumericDocValues getNumericDocValues(LeafReaderContext context, String
* org.apache.lucene.index.LeafReader#getNumericDocValues} and sorts by ascending value */
public static class DoubleComparator extends NumericComparator<Double> {
private final double[] values;
private double bottom;
private double topValue;
protected double bottom;
protected double topValue;

/**
* Creates a new comparator based on {@link Double#compare} for {@code numHits}.
Expand Down Expand Up @@ -225,8 +225,8 @@ public int compareTop(int doc) throws IOException {
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
public static class FloatComparator extends NumericComparator<Float> {
private final float[] values;
private float bottom;
private float topValue;
protected float bottom;
protected float topValue;

/**
* Creates a new comparator based on {@link Float#compare} for {@code numHits}.
Expand Down Expand Up @@ -285,8 +285,8 @@ public int compareTop(int doc) throws IOException {
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
public static class IntComparator extends NumericComparator<Integer> {
private final int[] values;
private int bottom; // Value of bottom of queue
private int topValue;
protected int bottom; // Value of bottom of queue
protected int topValue;

/**
* Creates a new comparator based on {@link Integer#compare} for {@code numHits}.
Expand Down Expand Up @@ -347,8 +347,8 @@ public int compareTop(int doc) throws IOException {
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
public static class LongComparator extends NumericComparator<Long> {
private final long[] values;
private long bottom;
private long topValue;
protected long bottom;
protected long topValue;

/**
* Creates a new comparator based on {@link Long#compare} for {@code numHits}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ private static final class OneComparatorFieldValueHitQueue<T extends FieldValueH
private final int oneReverseMul;
private final FieldComparator<?> oneComparator;

public OneComparatorFieldValueHitQueue(SortField[] fields, int size) {
super(fields, size);
public OneComparatorFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
super(fields, size, filterNonCompetitiveDocs);

assert fields.length == 1;
oneComparator = comparators[0];
Expand Down Expand Up @@ -95,8 +95,8 @@ protected boolean lessThan(final Entry hitA, final Entry hitB) {
*/
private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {

public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size) {
super(fields, size);
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
super(fields, size, filterNonCompetitiveDocs);
}

@Override
Expand All @@ -121,7 +121,7 @@ protected boolean lessThan(final Entry hitA, final Entry hitB) {
}

// prevent instantiation and extension.
private FieldValueHitQueue(SortField[] fields, int size) {
private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
super(size);
// When we get here, fields.length is guaranteed to be > 0, therefore no
// need to check it again.
Expand All @@ -135,9 +135,15 @@ private FieldValueHitQueue(SortField[] fields, int size) {
reverseMul = new int[numComparators];
for (int i = 0; i < numComparators; ++i) {
SortField field = fields[i];

reverseMul[i] = field.reverse ? -1 : 1;
comparators[i] = field.getComparator(size, i);
if (i == 0 && field.getCanUsePoints() && filterNonCompetitiveDocs) {
// try to rewrite the 1st comparator to the comparator that can skip non-competitive documents
// skipping functionality is beneficial only for the 1st comparator
comparators[i] = FilteringFieldComparator.wrapToFilteringComparator(field.getComparator(size, i),
field.reverse, numComparators == 1);
} else {
comparators[i] = field.getComparator(size, i);
}
}
}

Expand All @@ -152,17 +158,20 @@ private FieldValueHitQueue(SortField[] fields, int size) {
* priority first); cannot be <code>null</code> or empty
* @param size
* The number of hits to retain. Must be greater than zero.
* @param filterNonCompetitiveDocs
* {@code true} If comparators should be allowed to filter non-competitive documents, {@code false} otherwise
*/
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size) {
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size,
boolean filterNonCompetitiveDocs) {

if (fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field");
}

if (fields.length == 1) {
return new OneComparatorFieldValueHitQueue<>(fields, size);
return new OneComparatorFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
} else {
return new MultiComparatorsFieldValueHitQueue<>(fields, size);
return new MultiComparatorsFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import org.apache.lucene.index.LeafReaderContext;

import java.io.IOException;

/**
* A wrapper over {@code FieldComparator} that provides a leaf comparator that can filter non-competitive docs.
*/
abstract class FilteringFieldComparator<T> extends FieldComparator<T> {
protected final FieldComparator<T> in;
protected final boolean reverse;
// singleSort is true, if sort is based on a single sort field. As there are no other sorts configured
// as tie breakers, we can filter out docs with equal values.
protected final boolean singleSort;
protected boolean hasTopValue = false;

public FilteringFieldComparator(FieldComparator<T> in, boolean reverse, boolean singleSort) {
this.in = in;
this.reverse = reverse;
this.singleSort = singleSort;
}

@Override
public abstract FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException;

@Override
public int compare(int slot1, int slot2) {
return in.compare(slot1, slot2);
}

@Override
public T value(int slot) {
return in.value(slot);
}

@Override
public void setTopValue(T value) {
in.setTopValue(value);
hasTopValue = true;
}

@Override
public int compareValues(T first, T second) {
return in.compareValues(first, second);
}


/**
* Try to wrap a given field comparator to add to it a functionality to skip over non-competitive docs.
* If for the given comparator the skip functionality is not implemented, return the comparator itself.
* @param comparator – comparator to wrap
* @param reverse – if this sort is reverse
* @param singleSort – true if this sort is based on a single field and there are no other sort fields for tie breaking
* @return comparator wrapped as a filtering comparator or the original comparator if the filtering functionality
* is not implemented for it
*/
public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> comparator, boolean reverse, boolean singleSort) {
Class<?> comparatorClass = comparator.getClass();
if (comparatorClass == FieldComparator.LongComparator.class){
return new FilteringNumericComparator<>((FieldComparator.LongComparator) comparator, reverse, singleSort);
}
if (comparatorClass == FieldComparator.IntComparator.class){
return new FilteringNumericComparator<>((FieldComparator.IntComparator) comparator, reverse, singleSort);
}
if (comparatorClass == FieldComparator.DoubleComparator.class){
return new FilteringNumericComparator<>((FieldComparator.DoubleComparator) comparator, reverse, singleSort);
}
if (comparatorClass == FieldComparator.FloatComparator.class){
return new FilteringNumericComparator<>((FieldComparator.FloatComparator) comparator, reverse, singleSort);
}
return comparator;
}

}


Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import java.io.IOException;

/**
* Decorates a wrapped LeafFieldComparator to add a functionality to skip over non-competitive docs.
* FilteringLeafFieldComparator provides two additional functions to a LeafFieldComparator:
* {@code competitiveIterator()} and {@code setCanUpdateIterator()}.
*/
public interface FilteringLeafFieldComparator extends LeafFieldComparator {
/**
* Returns a competitive iterator
* @return an iterator over competitive docs that are stronger than already collected docs
* or {@code null} if such an iterator is not available for the current segment.
*/
DocIdSetIterator competitiveIterator() throws IOException;

/**
* Informs this leaf comparator that it is allowed to start updating its competitive iterator.
* This method is called from a collector when queue becomes full and threshold is reached.
*/
void setCanUpdateIterator() throws IOException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import org.apache.lucene.index.LeafReaderContext;

import java.io.IOException;

/**
* A wrapper over {@code NumericComparator} that provides a leaf comparator that can filter non-competitive docs.
*/
class FilteringNumericComparator<T extends Number> extends FilteringFieldComparator<T> {
public FilteringNumericComparator(NumericComparator<T> in, boolean reverse, boolean singleSort) {
super(in, reverse, singleSort);
}

@Override
public final FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
LeafFieldComparator inLeafComparator = in.getLeafComparator(context);
Class<?> comparatorClass = inLeafComparator.getClass();
if (comparatorClass == FieldComparator.LongComparator.class) {
return new FilteringNumericLeafComparator.FilteringLongLeafComparator((FieldComparator.LongComparator) inLeafComparator, context,
((LongComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} if (comparatorClass == FieldComparator.IntComparator.class) {
return new FilteringNumericLeafComparator.FilteringIntLeafComparator((FieldComparator.IntComparator) inLeafComparator, context,
((IntComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} else if (comparatorClass == FieldComparator.DoubleComparator.class) {
return new FilteringNumericLeafComparator.FilteringDoubleLeafComparator((FieldComparator.DoubleComparator) inLeafComparator, context,
((DoubleComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} else if (comparatorClass == FieldComparator.FloatComparator.class) {
return new FilteringNumericLeafComparator.FilteringFloatLeafComparator((FieldComparator.FloatComparator) inLeafComparator, context,
((FloatComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} else {
throw new IllegalStateException("Unexpected numeric class of ["+ comparatorClass + "] for [FieldComparator]!");
}
}

}
Loading

0 comments on commit d06fb96

Please sign in to comment.