Skip to content

Commit

Permalink
report indexing time, force merge time in s not msec
Browse files Browse the repository at this point in the history
  • Loading branch information
mikemccand committed Sep 10, 2024
1 parent 291641c commit 23a7d42
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
30 changes: 22 additions & 8 deletions src/main/knn/KnnGraphTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public class KnnGraphTester {
private boolean reindex;
private boolean forceMerge;
private int reindexTimeMsec;
private double forceMergeTimeSec;
private int indexNumSegments;
private double indexSizeOnDiskMB;
private int beamWidth;
Expand Down Expand Up @@ -153,6 +154,7 @@ private KnnGraphTester() {
vectorEncoding = VectorEncoding.FLOAT32;
selectivity = 1f;
prefilter = false;
quantize = false;
randomCommits = false;
quantizeBits = 7;
quantizeCompress = false;
Expand Down Expand Up @@ -297,7 +299,7 @@ private void run(String... args) throws Exception {
similarityFunction = VectorSimilarityFunction.DOT_PRODUCT;
break;
default:
throw new IllegalArgumentException("-metric can be 'angular', 'euclidean', 'cosine', or 'mip' only");
throw new IllegalArgumentException("-metric can be 'mip', 'cosine', 'euclidean', 'angular' (or 'dot_product' -- same as 'angular') only; got: " + metric);
}
break;
case "-forceMerge":
Expand Down Expand Up @@ -363,10 +365,10 @@ private void run(String... args) throws Exception {
0,
quiet
).createIndex();
System.out.println("reindex takes " + reindexTimeMsec + " ms");
System.out.println(String.format("reindex takes %.2f sec", msToSec(reindexTimeMsec));
}
if (forceMerge) {
forceMerge();
forceMergeTimeSec = forceMerge();
}
try (Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir)) {
indexNumSegments = reader.leaves().size();
Expand Down Expand Up @@ -427,19 +429,22 @@ private void printFanoutHist(Path indexPath) throws IOException {
}

@SuppressForbidden(reason = "Prints stuff")
private void forceMerge() throws IOException {
private double forceMerge() throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.APPEND);
iwc.setCodec(getCodec(maxConn, beamWidth, exec, numMergeWorker, quantize, quantizeBits, quantizeCompress));
if (quiet == false) {
// not a quiet place!
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
}
System.out.println("Force merge index in " + indexPath);
long start = System.currentTimeMillis();
long startNS = System.nanoTime();
try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) {
iw.forceMerge(1);
}
System.out.println("Force merge done in: " + (System.currentTimeMillis() - start) + " ms");
long endNS = System.nanoTime();
double elapsedSec = nsToSec(endNS - startNS);
System.out.println(String.format(Locale.ROOT, "Force merge done in %.2f sec", elapsedSec));
return elapsedSec;
}

@SuppressForbidden(reason = "Prints stuff")
Expand Down Expand Up @@ -660,7 +665,7 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][]
}
System.out.printf(
Locale.ROOT,
"SUMMARY: %5.3f\t%5.3f\t%d\t%d\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%.2f\t%.2f\t%s\n",
"SUMMARY: %5.3f\t%5.3f\t%d\t%d\t%d\t%d\t%d\t%s\t%d\t%.2f\t%.2f\t%d\t%.2f\t%.2f\t%s\n",
recall,
totalCpuTimeMS / (float) numIters,
numDocs,
Expand All @@ -670,14 +675,23 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][]
beamWidth,
quantizeDesc,
totalVisited,
reindexTimeMsec,
reindexTimeMsec / 1000.0,
forceMergeTimeSec,
indexNumSegments,
indexSizeOnDiskMB,
selectivity,
prefilter ? "pre-filter" : "post-filter");
}
}

private static double nsToSec(long ns) {
return ns / (double) 1_000_000_000;
}

private static double msToSec(long ms) {
return ns / (double) 1_000;
}

private static TopDocs doKnnByteVectorQuery(
IndexSearcher searcher, String field, byte[] vector, int k, int fanout, Query filter)
throws IOException {
Expand Down
24 changes: 18 additions & 6 deletions src/python/knnPerfTest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
#!/usr/bin/env/python

# TODO
# - why force merge 12X slower
# - why only one thread
# - try turning off diversity
# - report forceMerge time in the final table
# - report net concurrency utilized in the table
# - add -stats option here that just runs KnnGraphTest 2nd time with -stats and includes/summarizes output or so

import subprocess
import sys
import benchUtil
Expand Down Expand Up @@ -38,16 +46,16 @@
#'ndoc': (10000, 100000, 1000000),
#'ndoc': (10000, 100000, 200000, 500000),
#'ndoc': (10000, 100000, 200000, 500000),
'ndoc': (250_000,),
'ndoc': (1_500_000,),
#'ndoc': (100000,),
#'maxConn': (32, 64, 96),
#'maxConn': (64, ),
'maxConn': (8, 16, 32),
'maxConn': (8, 16, 32, 50),
#'beamWidthIndex': (250, 500),
#'beamWidthIndex': (250, ),
'beamWidthIndex': (50, ),
'beamWidthIndex': (50,),
#'fanout': (20, 100, 250)
'fanout': (0,),
'fanout': (6,),
#'quantize': None,
'quantizeBits': (32,),
'numMergeWorker': (12,),
Expand Down Expand Up @@ -131,7 +139,8 @@ def run_knn_benchmark(checkout, values):
'-search', query_vectors,
#'-metric', 'euclidean',
# '-numMergeThread', '8', '-numMergeWorker', '8',
#'-forceMerge',
'-forceMerge',
#'-stats',
'-quiet']
print(f' cmd: {this_cmd}')
job = subprocess.Popen(this_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8')
Expand All @@ -153,7 +162,7 @@ def run_knn_benchmark(checkout, values):
all_results.append(summary)
print('\nResults:')

header = 'recall\tlatency (ms)\tnDoc\ttopK\tfanout\tmaxConn\tbeamWidth\tquantized\tvisited\tindex ms\tnum segments\tindex size (MB)\tselectivity\tfilterType'
header = 'recall\tlatency (ms)\tnDoc\ttopK\tfanout\tmaxConn\tbeamWidth\tquantized\tvisited\tindex s\tforce merge s\tnum segments\tindex size (MB)\tselectivity\tfilterType'

# crazy logic to make everything fixed width so rendering in fixed width font "aligns":
headers = header.split('\t')
Expand All @@ -167,6 +176,9 @@ def run_knn_benchmark(checkout, values):
# turn filterType/selectivity back on for all runs
skip_headers = {'selectivity', 'filterType', 'visited'}

if '-forceMerge' not in this_cmd:
skip_headers.add('force merge s')

skip_column_index = {headers.index(h) for h in skip_headers}

for row in rows_to_print:
Expand Down

0 comments on commit 23a7d42

Please sign in to comment.