Skip to content

Commit

Permalink
[ML] Impose a minimum on the automatically calculated JVM size (#91732)…
Browse files Browse the repository at this point in the history
… (#91742)

This change fixes a discrepancy that has existed for a long time
but was revealed by #91694. The ML automatic node/JVM sizing code
contained a minimum node size but did not restrict the minimum
JVM size to the size that would be chosen on that minimum node
size. This could throw off calculations at small scale.

Fixes #91728
  • Loading branch information
droberts195 authored Nov 21, 2022
1 parent 2ea0662 commit e9f3303
Showing 1 changed file with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public final class NativeMemoryCalculator {
// Must match the value used in MachineDependentHeap.MachineNodeRole.ML_ONLY.
public static final long JVM_SIZE_KNOT_POINT = ByteSizeValue.ofGb(16).getBytes();
private static final long BYTES_IN_4MB = ByteSizeValue.ofMb(4).getBytes();
// The minimum automatic node size implicitly defines a minimum JVM size
private static final long MINIMUM_AUTOMATIC_JVM_SIZE = dynamicallyCalculateJvmSizeFromNodeSize(MINIMUM_AUTOMATIC_NODE_SIZE);

private NativeMemoryCalculator() {}

Expand Down Expand Up @@ -165,7 +167,7 @@ public static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) {
);
}

public static long dynamicallyCalculateJvmSizeFromMlNativeMemorySize(long nativeMachineMemory) {
public static long dynamicallyCalculateJvmSizeFromMlNativeMemorySize(long mlNativeMemorySize) {
// For <= 16GB node, the JVM is 0.4 * total_node_size. This means the rest is 0.6 the node size.
// So, nativeAndOverhead = 0.6 * total_node_size => total_node_size = (nativeAndOverhead / 0.6)
// Consequently jvmSize = (nativeAndOverhead / 0.6) * 0.4 = nativeAndOverhead * 2 / 3
Expand All @@ -177,7 +179,7 @@ public static long dynamicallyCalculateJvmSizeFromMlNativeMemorySize(long native
//
// In both cases JVM size is rounded down to the next lower multiple of 4 megabytes to match
// MachineDependentHeap.MachineNodeRole.ML_ONLY.
long nativeAndOverhead = nativeMachineMemory + OS_OVERHEAD;
long nativeAndOverhead = mlNativeMemorySize + OS_OVERHEAD;
long higherAnswer;
if (nativeAndOverhead <= (JVM_SIZE_KNOT_POINT - dynamicallyCalculateJvmSizeFromNodeSize(JVM_SIZE_KNOT_POINT))) {
higherAnswer = (nativeAndOverhead * 2 / 3 / BYTES_IN_4MB) * BYTES_IN_4MB;
Expand All @@ -196,10 +198,10 @@ public static long dynamicallyCalculateJvmSizeFromMlNativeMemorySize(long native
long lowerAnswer = higherAnswer - BYTES_IN_4MB;
long nodeSizeImpliedByLowerAnswer = nativeAndOverhead + lowerAnswer;
if (dynamicallyCalculateJvmSizeFromNodeSize(nodeSizeImpliedByLowerAnswer) == lowerAnswer) {
return Math.min(lowerAnswer, STATIC_JVM_UPPER_THRESHOLD);
return Math.max(MINIMUM_AUTOMATIC_JVM_SIZE, Math.min(lowerAnswer, STATIC_JVM_UPPER_THRESHOLD));
}
}
return Math.min(higherAnswer, STATIC_JVM_UPPER_THRESHOLD);
return Math.max(MINIMUM_AUTOMATIC_JVM_SIZE, Math.min(higherAnswer, STATIC_JVM_UPPER_THRESHOLD));
}

/**
Expand Down

0 comments on commit e9f3303

Please sign in to comment.