Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed merge logic for multiple shards case #877

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Features
### Enhancements
### Bug Fixes
- Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877))
### Infrastructure
- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))
### Documentation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,15 @@ class TopDocsMerger {
* @return merged TopDocsAndMaxScore object
*/
public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) {
if (Objects.isNull(newTopDocs) || Objects.isNull(newTopDocs.topDocs) || newTopDocs.topDocs.totalHits.value == 0) {
// we need to check if any of source and destination top docs are empty. This is needed for case when concurrent segment search
// is enabled. In such case search is done by multiple workers, and results are saved in multiple doc collectors. Any on those
// results can be empty, in such case we can skip actual merge logic and just return result object.
if (isEmpty(newTopDocs)) {
return source;
}
if (isEmpty(source)) {
return newTopDocs;
}
martin-gaievski marked this conversation as resolved.
Show resolved Hide resolved
TotalHits mergedTotalHits = getMergedTotalHits(source, newTopDocs);
TopDocsAndMaxScore result = new TopDocsAndMaxScore(
getTopDocs(getMergedScoreDocs(source.topDocs.scoreDocs, newTopDocs.topDocs.scoreDocs), mergedTotalHits),
Expand All @@ -66,6 +72,20 @@ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAn
return result;
}

/**
* Checks if TopDocsAndMaxScore is null, has no top docs or zero total hits
* @param topDocsAndMaxScore
* @return
*/
private static boolean isEmpty(final TopDocsAndMaxScore topDocsAndMaxScore) {
if (Objects.isNull(topDocsAndMaxScore)
|| Objects.isNull(topDocsAndMaxScore.topDocs)
|| topDocsAndMaxScore.topDocs.totalHits.value == 0) {
return true;
}
return false;
}

private TotalHits getMergedTotalHits(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) {
// merged value is a lower bound - if both are equal_to than merged will also be equal_to,
// otherwise assign greater_than_or_equal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,65 @@ public void testMergeScoreDocs_whenBothTopDocsHasNoHits_thenSuccessful() {
assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[3].score, 0);
}

@SneakyThrows
public void testMergeScoreDocs_whenSomeSegmentsHasNoHits_thenSuccessful() {
// Given
TopDocsMerger topDocsMerger = new TopDocsMerger(null);

// When
// first segment has no results, and we merge with non-empty segment
TopDocs topDocsOriginal = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
TopDocsAndMaxScore topDocsAndMaxScoreOriginal = new TopDocsAndMaxScore(topDocsOriginal, 0);
TopDocs topDocsNew = new TopDocs(
new TotalHits(2, TotalHits.Relation.EQUAL_TO),
new ScoreDoc[] {
createStartStopElementForHybridSearchResults(0),
createDelimiterElementForHybridSearchResults(0),
new ScoreDoc(0, 0.5f),
new ScoreDoc(2, 0.3f),
createStartStopElementForHybridSearchResults(0) }
);
TopDocsAndMaxScore topDocsAndMaxScoreNew = new TopDocsAndMaxScore(topDocsNew, 0.5f);
TopDocsAndMaxScore mergedTopDocsAndMaxScore = topDocsMerger.merge(topDocsAndMaxScoreOriginal, topDocsAndMaxScoreNew);

// Then
assertNotNull(mergedTopDocsAndMaxScore);

assertEquals(0.5f, mergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION);
assertEquals(2, mergedTopDocsAndMaxScore.topDocs.totalHits.value);
assertEquals(TotalHits.Relation.EQUAL_TO, mergedTopDocsAndMaxScore.topDocs.totalHits.relation);
assertEquals(5, mergedTopDocsAndMaxScore.topDocs.scoreDocs.length);
// check format, all elements one by one
ScoreDoc[] scoreDocs = mergedTopDocsAndMaxScore.topDocs.scoreDocs;
assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, 0);
assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, 0);
assertScoreDoc(scoreDocs[2], 0, 0.5f);
assertScoreDoc(scoreDocs[3], 2, 0.3f);
assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[4].score, 0);

// When
// source object has results, and we merge with empty segment
TopDocs topDocsNewEmpty = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
TopDocsAndMaxScore topDocsAndMaxScoreNewEmpty = new TopDocsAndMaxScore(topDocsNewEmpty, 0);
TopDocsAndMaxScore finalMergedTopDocsAndMaxScore = topDocsMerger.merge(mergedTopDocsAndMaxScore, topDocsAndMaxScoreNewEmpty);

// Then
// merged object remains unchanged
assertNotNull(finalMergedTopDocsAndMaxScore);

assertEquals(0.5f, finalMergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION);
assertEquals(2, finalMergedTopDocsAndMaxScore.topDocs.totalHits.value);
assertEquals(TotalHits.Relation.EQUAL_TO, finalMergedTopDocsAndMaxScore.topDocs.totalHits.relation);
assertEquals(5, finalMergedTopDocsAndMaxScore.topDocs.scoreDocs.length);
// check format, all elements one by one
ScoreDoc[] finalScoreDocs = finalMergedTopDocsAndMaxScore.topDocs.scoreDocs;
assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[0].score, 0);
assertEquals(MAGIC_NUMBER_DELIMITER, finalScoreDocs[1].score, 0);
assertScoreDoc(finalScoreDocs[2], 0, 0.5f);
assertScoreDoc(finalScoreDocs[3], 2, 0.3f);
assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[4].score, 0);
}

@SneakyThrows
public void testThreeSequentialMerges_whenAllTopDocsHasHits_thenSuccessful() {
TopDocsMerger topDocsMerger = new TopDocsMerger(null);
Expand Down
Loading