diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java index 91887e2e4a202..dd2e7458d81d2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java @@ -125,6 +125,7 @@ private Collector pickCollector(LeafReaderContext ctx) throws IOException { if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource; final SortedSetDocValues ordinalValues = source.ordinalsValues(ctx); + final SortedSetDocValues globalOrdinalValues = source.globalOrdinalsValues(ctx); final long maxOrd = ordinalValues.getValueCount(); if (maxOrd == 0) { emptyCollectorsUsed++; @@ -179,7 +180,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) { if (counts == null || owningBucketOrdinal >= counts.maxOrd() || counts.cardinality(owningBucketOrdinal) == 0) { return buildEmptyAggregation(); } - // We need to build a copy because the returned Aggregation needs remain usable after + // We need to build a copy because the returned Aggregation needs to remain usable after // this Aggregator (and its HLL++ counters) is released. AbstractHyperLogLogPlusPlus copy = counts.clone(owningBucketOrdinal, BigArrays.NON_RECYCLING_INSTANCE); return new InternalCardinality(name, copy, metadata()); @@ -322,6 +323,9 @@ public void collect(int doc, long bucketOrd) throws IOException { bits.set((int) ord); } } + // for this owning bucket ord, save the values of current doc as value ordinals bits + // visitedOrds (array with index as owning bucket, value as bits) + // ordinals is number array, each element representing a text or term, and sorted by the values } @Override @@ -336,6 +340,7 @@ public void postCollect() throws IOException { try (LongArray hashes = bigArrays.newLongArray(maxOrd, false)) { final MurmurHash3.Hash128 hash = new MurmurHash3.Hash128(); + // for every ordinal, we want the hash of its value for (long ord = allVisitedOrds.nextSetBit(0); ord < Long.MAX_VALUE; ord = ord + 1 < maxOrd ? allVisitedOrds.nextSetBit(ord + 1) : Long.MAX_VALUE) { @@ -347,6 +352,7 @@ public void postCollect() throws IOException { for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) { final BitArray bits = visitedOrds.get(bucket); if (bits != null) { + // for every ordinal of this bucket, we collect by using its hash for (long ord = bits.nextSetBit(0); ord < Long.MAX_VALUE; ord = ord + 1 < maxOrd ? bits.nextSetBit(ord + 1) : Long.MAX_VALUE) { diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java index 75fa3d2eb6f93..9b6a0f42e0fa9 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/DisjunctionWithDynamicPruningScorer.java @@ -31,7 +31,7 @@ * rest of the search space, so this term's subscorer DISI can be safely removed from list of subscorer to process. *

* 2. {@link #removeAllDISIsOnCurrentDoc()} breaks the invariant of Conjuction DISI i.e. the docIDs of all sub-scorers should be - * less than or equal to current docID iterator is pointing to. When we remove elements from priority, it results in heapify action, which modifies + * less than or equal to current docID iterator is pointing to. When we remove elements from disi priority queue, it results in heapify action, which modifies * the top of the priority queye, which represents the current docID for subscorers here. To address this, we are wrapping the * iterator with {@link SlowDocIdPropagatorDISI} which keeps the iterator pointing to last docID before {@link #removeAllDISIsOnCurrentDoc()} * is called and updates this docID only when next() or advance() is called. @@ -97,7 +97,6 @@ public DocIdSetIterator iterator() { private static class SlowDocIdPropagatorDISI extends DocIdSetIterator { DocIdSetIterator disi; - Integer curDocId; SlowDocIdPropagatorDISI(DocIdSetIterator disi, Integer curDocId) { @@ -147,11 +146,6 @@ public TwoPhaseIterator twoPhaseIterator() { return twoPhase; } - @Override - public float getMaxScore(int i) throws IOException { - return 0; - } - private class TwoPhase extends TwoPhaseIterator { private final float matchCost; @@ -231,7 +225,6 @@ public float matchCost() { } } - @Override public final int docID() { return subScorers.top().doc; @@ -262,4 +255,9 @@ public final Collection getChildren() throws IOException { } return children; } + + @Override + public float getMaxScore(int i) throws IOException { + return 0; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/InternalCardinality.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/InternalCardinality.java index 7e9511ffdd379..9f9ad63220fea 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/InternalCardinality.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/InternalCardinality.java @@ -117,7 +117,6 @@ public InternalAggregation reduce(List aggregations, Reduce return aggregations.get(0); } else { return new InternalCardinality(name, reduced, getMetadata()); - } }