Skip to content

Commit

Permalink
remaining disjunction scorer full understand
Browse files Browse the repository at this point in the history
Signed-off-by: bowenlan-amzn <bowenlan23@gmail.com>
  • Loading branch information
bowenlan-amzn committed May 24, 2024
1 parent 4dd4ddc commit a3cb39d
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ private Collector pickCollector(LeafReaderContext ctx) throws IOException {
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) {
ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource;
final SortedSetDocValues ordinalValues = source.ordinalsValues(ctx);
final SortedSetDocValues globalOrdinalValues = source.globalOrdinalsValues(ctx);
final long maxOrd = ordinalValues.getValueCount();
if (maxOrd == 0) {
emptyCollectorsUsed++;
Expand Down Expand Up @@ -179,7 +180,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
if (counts == null || owningBucketOrdinal >= counts.maxOrd() || counts.cardinality(owningBucketOrdinal) == 0) {
return buildEmptyAggregation();
}
// We need to build a copy because the returned Aggregation needs remain usable after
// We need to build a copy because the returned Aggregation needs to remain usable after
// this Aggregator (and its HLL++ counters) is released.
AbstractHyperLogLogPlusPlus copy = counts.clone(owningBucketOrdinal, BigArrays.NON_RECYCLING_INSTANCE);
return new InternalCardinality(name, copy, metadata());
Expand Down Expand Up @@ -322,6 +323,9 @@ public void collect(int doc, long bucketOrd) throws IOException {
bits.set((int) ord);
}
}
// for this owning bucket ord, save the values of current doc as value ordinals bits
// visitedOrds (array with index as owning bucket, value as bits)
// ordinals is number array, each element representing a text or term, and sorted by the values
}

@Override
Expand All @@ -336,6 +340,7 @@ public void postCollect() throws IOException {

try (LongArray hashes = bigArrays.newLongArray(maxOrd, false)) {
final MurmurHash3.Hash128 hash = new MurmurHash3.Hash128();
// for every ordinal, we want the hash of its value
for (long ord = allVisitedOrds.nextSetBit(0); ord < Long.MAX_VALUE; ord = ord + 1 < maxOrd
? allVisitedOrds.nextSetBit(ord + 1)
: Long.MAX_VALUE) {
Expand All @@ -347,6 +352,7 @@ public void postCollect() throws IOException {
for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) {
final BitArray bits = visitedOrds.get(bucket);
if (bits != null) {
// for every ordinal of this bucket, we collect by using its hash
for (long ord = bits.nextSetBit(0); ord < Long.MAX_VALUE; ord = ord + 1 < maxOrd
? bits.nextSetBit(ord + 1)
: Long.MAX_VALUE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* rest of the search space, so this term's subscorer DISI can be safely removed from list of subscorer to process.
* <p>
* 2. {@link #removeAllDISIsOnCurrentDoc()} breaks the invariant of Conjuction DISI i.e. the docIDs of all sub-scorers should be
* less than or equal to current docID iterator is pointing to. When we remove elements from priority, it results in heapify action, which modifies
* less than or equal to current docID iterator is pointing to. When we remove elements from disi priority queue, it results in heapify action, which modifies
* the top of the priority queye, which represents the current docID for subscorers here. To address this, we are wrapping the
* iterator with {@link SlowDocIdPropagatorDISI} which keeps the iterator pointing to last docID before {@link #removeAllDISIsOnCurrentDoc()}
* is called and updates this docID only when next() or advance() is called.
Expand Down Expand Up @@ -97,7 +97,6 @@ public DocIdSetIterator iterator() {

private static class SlowDocIdPropagatorDISI extends DocIdSetIterator {
DocIdSetIterator disi;

Integer curDocId;

SlowDocIdPropagatorDISI(DocIdSetIterator disi, Integer curDocId) {
Expand Down Expand Up @@ -147,11 +146,6 @@ public TwoPhaseIterator twoPhaseIterator() {
return twoPhase;
}

@Override
public float getMaxScore(int i) throws IOException {
return 0;
}

private class TwoPhase extends TwoPhaseIterator {

private final float matchCost;
Expand Down Expand Up @@ -231,7 +225,6 @@ public float matchCost() {
}
}


@Override
public final int docID() {
return subScorers.top().doc;
Expand Down Expand Up @@ -262,4 +255,9 @@ public final Collection<ChildScorable> getChildren() throws IOException {
}
return children;
}

@Override
public float getMaxScore(int i) throws IOException {
return 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ public InternalAggregation reduce(List<InternalAggregation> aggregations, Reduce
return aggregations.get(0);
} else {
return new InternalCardinality(name, reduced, getMetadata());

}
}

Expand Down

0 comments on commit a3cb39d

Please sign in to comment.