diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java index 91887e2e4a202..dd2e7458d81d2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java @@ -125,6 +125,7 @@ private Collector pickCollector(LeafReaderContext ctx) throws IOException { if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource; final SortedSetDocValues ordinalValues = source.ordinalsValues(ctx); + final SortedSetDocValues globalOrdinalValues = source.globalOrdinalsValues(ctx); final long maxOrd = ordinalValues.getValueCount(); if (maxOrd == 0) { emptyCollectorsUsed++; @@ -179,7 +180,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) { if (counts == null || owningBucketOrdinal >= counts.maxOrd() || counts.cardinality(owningBucketOrdinal) == 0) { return buildEmptyAggregation(); } - // We need to build a copy because the returned Aggregation needs remain usable after + // We need to build a copy because the returned Aggregation needs to remain usable after // this Aggregator (and its HLL++ counters) is released. AbstractHyperLogLogPlusPlus copy = counts.clone(owningBucketOrdinal, BigArrays.NON_RECYCLING_INSTANCE); return new InternalCardinality(name, copy, metadata()); @@ -322,6 +323,9 @@ public void collect(int doc, long bucketOrd) throws IOException { bits.set((int) ord); } } + // for this owning bucket ord, save the values of current doc as value ordinals bits + // visitedOrds (array with index as owning bucket, value as bits) + // ordinals is number array, each element representing a text or term, and sorted by the values } @Override @@ -336,6 +340,7 @@ public void postCollect() throws IOException { try (LongArray hashes = bigArrays.newLongArray(maxOrd, false)) { final MurmurHash3.Hash128 hash = new MurmurHash3.Hash128(); + // for every ordinal, we want the hash of its value for (long ord = allVisitedOrds.nextSetBit(0); ord < Long.MAX_VALUE; ord = ord + 1 < maxOrd ? allVisitedOrds.nextSetBit(ord + 1) : Long.MAX_VALUE) { @@ -347,6 +352,7 @@ public void postCollect() throws IOException { for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) { final BitArray bits = visitedOrds.get(bucket); if (bits != null) { + // for every ordinal of this bucket, we collect by using its hash for (long ord = bits.nextSetBit(0); ord < Long.MAX_VALUE; ord = ord + 1 < maxOrd ? bits.nextSetBit(ord + 1) : Long.MAX_VALUE) { diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java index 75fa3d2eb6f93..9b6a0f42e0fa9 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java @@ -31,7 +31,7 @@ * rest of the search space, so this term's subscorer DISI can be safely removed from list of subscorer to process. *
* 2. {@link #removeAllDISIsOnCurrentDoc()} breaks the invariant of Conjuction DISI i.e. the docIDs of all sub-scorers should be
- * less than or equal to current docID iterator is pointing to. When we remove elements from priority, it results in heapify action, which modifies
+ * less than or equal to current docID iterator is pointing to. When we remove elements from disi priority queue, it results in heapify action, which modifies
* the top of the priority queye, which represents the current docID for subscorers here. To address this, we are wrapping the
* iterator with {@link SlowDocIdPropagatorDISI} which keeps the iterator pointing to last docID before {@link #removeAllDISIsOnCurrentDoc()}
* is called and updates this docID only when next() or advance() is called.
@@ -97,7 +97,6 @@ public DocIdSetIterator iterator() {
private static class SlowDocIdPropagatorDISI extends DocIdSetIterator {
DocIdSetIterator disi;
-
Integer curDocId;
SlowDocIdPropagatorDISI(DocIdSetIterator disi, Integer curDocId) {
@@ -147,11 +146,6 @@ public TwoPhaseIterator twoPhaseIterator() {
return twoPhase;
}
- @Override
- public float getMaxScore(int i) throws IOException {
- return 0;
- }
-
private class TwoPhase extends TwoPhaseIterator {
private final float matchCost;
@@ -231,7 +225,6 @@ public float matchCost() {
}
}
-
@Override
public final int docID() {
return subScorers.top().doc;
@@ -262,4 +255,9 @@ public final Collection