Skip to content

Commit

Permalink
[JENKINS-14789] Configurable interval for computer retention check (#…
Browse files Browse the repository at this point in the history
…7037)

* [JENKINS-14789] Configurable interval for computer retention check

Also update the lower bound delay as returned by each RetentionStrategy
 from 1min to 0min. This is enabling check intervals of <1min to trigger
 a re-check in the next cycle (previously, the nextCheck tracking would
 defer the next re-check into the 2nd cycle -- one for the timer loop
 and once for the nextCheck timestamp).

Includes a bug fix:
`CloudSlaveRetentionStrategy` tracks the agent timeout in milliseconds
 internally and returned the nextCheck delay as a tenth of that.
The nextCheck delay is consumed in minutes, leading to the max delay
 of one hour being used.
Fix: Convert the delay from milliseconds to minutes.

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] Allow a check interval of one second

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] Apply review feedback

- Double down on JavaDoc
- Migrate UI to jelly

Co-Authored-By: Alexander Brandes <mc.cache@web.de>
Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] Store computerRetentionCheckInterval in model instance

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] Add missing javadoc comment

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] Add tests for model

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] Use AperiodicWork for ComputerRetentionWork

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] limit computer retention check interval to 60s

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

* [JENKINS-14789] apply review feedback

Co-authored-by: Tim Jacomb <21194782+timja@users.noreply.github.com>
Signed-off-by: Jakob Ackermann <das7pad@outlook.com>

---------

Signed-off-by: Jakob Ackermann <das7pad@outlook.com>
Co-authored-by: Alexander Brandes <mc.cache@web.de>
Co-authored-by: Tim Jacomb <21194782+timja@users.noreply.github.com>
  • Loading branch information
3 people committed Jun 18, 2024
1 parent 0487432 commit 16b5d21
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public long check(final AbstractCloudComputer c) {
}
}
}
return 1;
return 0;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public long check(T c) {
}
}
}
return checkCycle();
return TimeUnit.MILLISECONDS.toMinutes(checkCycle());
}

/**
Expand Down
19 changes: 13 additions & 6 deletions core/src/main/java/hudson/slaves/ComputerRetentionWork.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
package hudson.slaves;

import hudson.Extension;
import hudson.ExtensionList;
import hudson.model.AperiodicWork;
import hudson.model.Computer;
import hudson.model.Node;
import hudson.model.PeriodicWork;
import hudson.model.Queue;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.concurrent.TimeUnit;
import jenkins.model.GlobalComputerRetentionCheckIntervalConfiguration;
import jenkins.model.Jenkins;
import org.jenkinsci.Symbol;

Expand All @@ -42,7 +44,7 @@
* @author Stephen Connolly
*/
@Extension @Symbol("computerRetention")
public class ComputerRetentionWork extends PeriodicWork {
public class ComputerRetentionWork extends AperiodicWork {

/**
* Use weak hash map to avoid leaking {@link Computer}.
Expand All @@ -51,12 +53,18 @@ public class ComputerRetentionWork extends PeriodicWork {

@Override
public long getRecurrencePeriod() {
return MIN;
return ExtensionList.lookupSingleton(GlobalComputerRetentionCheckIntervalConfiguration.class).getComputerRetentionCheckInterval() * 1000L;
}

@Override
public AperiodicWork getNewInstance() {
// ComputerRetentionWork is a singleton.
return this;
}

@SuppressWarnings("unchecked")
@Override
protected void doRun() {
protected void doAperiodicRun() {
final long startRun = System.currentTimeMillis();
for (final Computer c : Jenkins.get().getComputers()) {
Queue.withLock(new Runnable() {
Expand All @@ -67,8 +75,7 @@ public void run() {
return;
if (!nextCheck.containsKey(c) || startRun > nextCheck.get(c)) {
// at the moment I don't trust strategies to wait more than 60 minutes
// strategies need to wait at least one minute
final long waitInMins = Math.max(1, Math.min(60, c.getRetentionStrategy().check(c)));
final long waitInMins = Math.max(0, Math.min(60, c.getRetentionStrategy().check(c)));
nextCheck.put(c, startRun + TimeUnit.MINUTES.toMillis(waitInMins));
}
}
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/java/hudson/slaves/RetentionStrategy.java
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public Always() {
public long check(SlaveComputer c) {
if (c.isOffline() && !c.isConnecting() && c.isLaunchSupported())
c.tryReconnect();
return 1;
return 0;
}

@Extension(ordinal = 100) @Symbol("always")
Expand Down Expand Up @@ -285,7 +285,7 @@ public long check(final SlaveComputer c) {
return TimeUnit.MILLISECONDS.toMinutes(TimeUnit.MINUTES.toMillis(idleDelay) - idleMilliseconds);
}
}
return 1;
return 0;
}

@Extension @Symbol("demand")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ public void run() {
LOGGER.log(INFO,
"Disabling new jobs for computer {0} as it has finished its scheduled uptime",
new Object[]{c.getName()});
return 1;
return 0;
} else if (c.isIdle() && c.isAcceptingTasks()) {
Queue.withLock(new Runnable() {
@Override
Expand Down Expand Up @@ -243,7 +243,7 @@ public void run() {
}
}
}
return 1;
return 0;
}

private synchronized boolean isOnlineScheduled() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package jenkins.model;

import hudson.Extension;
import hudson.model.PersistentDescriptor;
import java.util.logging.Logger;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import org.jenkinsci.Symbol;
import org.kohsuke.stapler.StaplerRequest;

/**
* Configures check interval for computer retention.
*
* @author Jakob Ackermann
*/
@Extension(ordinal = 401) @Symbol("computerRetentionCheckInterval")
public class GlobalComputerRetentionCheckIntervalConfiguration extends GlobalConfiguration implements PersistentDescriptor {
/**
* For {@link hudson.slaves.ComputerRetentionWork#getRecurrencePeriod()}
*/
private int computerRetentionCheckInterval = 60;

/**
* Gets the check interval for computer retention.
*
* @since TODO
*/
public int getComputerRetentionCheckInterval() {
if (computerRetentionCheckInterval <= 0) {
LOGGER.info("computerRetentionCheckInterval must be greater than zero, falling back to 60s");
return 60;
}
if (computerRetentionCheckInterval > 60) {
LOGGER.info("computerRetentionCheckInterval is limited to 60s");
return 60;
}
return computerRetentionCheckInterval;
}

/**
* Updates the check interval for computer retention and restarts the check cycle.
*
* @param interval new check interval in seconds
* @throws IllegalArgumentException interval must be greater than zero
* @since TODO
*/
private void setComputerRetentionCheckInterval(int interval) throws IllegalArgumentException {
if (interval <= 0) {
throw new IllegalArgumentException("interval must be greater than zero");
}
if (interval > 60) {
throw new IllegalArgumentException("interval must be below or equal 60s");
}
computerRetentionCheckInterval = interval;
save();
}

@Override
public boolean configure(StaplerRequest req, JSONObject json) throws FormException {
try {
final int interval = json.getInt("computerRetentionCheckInterval");
setComputerRetentionCheckInterval(interval);
return true;
} catch (IllegalArgumentException e) {
throw new FormException(e, "computerRetentionCheckInterval");
} catch (JSONException e) {
throw new FormException(e.getMessage(), "computerRetentionCheckInterval");
}
}

private static final Logger LOGGER = Logger.getLogger(GlobalComputerRetentionCheckIntervalConfiguration.class.getName());
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<?jelly escape-by-default='true'?>
<j:jelly xmlns:j="jelly:core" xmlns:f="/lib/form">
<f:entry title="${%Computer Retention Check Interval}" help="/help/system-config/computerRetentionCheckInterval.html">
<f:number
clazz="required number" min="1" max="60"
field="computerRetentionCheckInterval"
checkMessage="${%The Computer Retention Check Interval is mandatory and must be a number greater than zero.}" />
</f:entry>
</j:jelly>
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package jenkins.model;

import static org.junit.Assert.assertEquals;

import hudson.model.Descriptor;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.logging.Level;
import net.sf.json.JSONObject;
import org.junit.Rule;
import org.junit.Test;
import org.jvnet.hudson.test.JenkinsRule;
import org.jvnet.hudson.test.LoggerRule;
import org.kohsuke.stapler.Stapler;


/**
* Ensure interval bounds are enforced when re-configuring and loading from disk. Also ensure default value handling.
*
* @author Jakob Ackermann
*/
public class GlobalComputerRetentionCheckIntervalConfigurationTest {
@Rule
public JenkinsRule j = new JenkinsRule();
@Rule
public LoggerRule logging = new LoggerRule();

private File getConfig(GlobalComputerRetentionCheckIntervalConfiguration c) {
return new File(j.jenkins.getRootDir(), c.getId() + ".xml");
}

private void recordWarnings() {
logging.record(GlobalComputerRetentionCheckIntervalConfiguration.class, Level.INFO).capture(100);
}

@Test
public void bootWithMissingCfg() {
recordWarnings();
GlobalComputerRetentionCheckIntervalConfiguration c = new GlobalComputerRetentionCheckIntervalConfiguration();
c.load();
assertEquals("default", 60, c.getComputerRetentionCheckInterval());
assertEquals("no fallback message", logging.getRecords().size(), 0);
}

private void writeConfig(GlobalComputerRetentionCheckIntervalConfiguration c, int interval) throws IOException {
String bad = "" +
"<?xml version='1.1' encoding='UTF-8'?>\n" +
"<jenkins.model.GlobalComputerRetentionCheckIntervalConfiguration>\n" +
" <computerRetentionCheckInterval>" + interval + "</computerRetentionCheckInterval>\n" +
"</jenkins.model.GlobalComputerRetentionCheckIntervalConfiguration>";
Files.writeString(getConfig(c).toPath(), bad, StandardCharsets.UTF_8);
}

private void checkUsesFallbackAfterLoadOf(int interval) throws IOException {
recordWarnings();
GlobalComputerRetentionCheckIntervalConfiguration c = new GlobalComputerRetentionCheckIntervalConfiguration();
writeConfig(c, interval);
c.load();
assertEquals("uses default", 60, c.getComputerRetentionCheckInterval());
assertEquals("prints one fallback message", 1, logging.getRecords().size());
assertEquals("fallback message content", "computerRetentionCheckInterval must be greater than zero, falling back to 60s", logging.getRecords().get(0).getMessage());
}

@Test
public void bootWithNegative() throws IOException {
checkUsesFallbackAfterLoadOf(-1);
}

@Test
public void bootWithZero() throws IOException {
checkUsesFallbackAfterLoadOf(0);
}

@Test
public void bootWithPositive() throws IOException {
recordWarnings();
GlobalComputerRetentionCheckIntervalConfiguration c = new GlobalComputerRetentionCheckIntervalConfiguration();
writeConfig(c, 1);
c.load();
assertEquals("uses custom value", 1, c.getComputerRetentionCheckInterval());
assertEquals("no fallback message", 0, logging.getRecords().size());
}

@Test
public void bootWithTooLargeValue() throws IOException {
recordWarnings();
GlobalComputerRetentionCheckIntervalConfiguration c = new GlobalComputerRetentionCheckIntervalConfiguration();
writeConfig(c, 1337);
c.load();
assertEquals("uses default", 60, c.getComputerRetentionCheckInterval());
assertEquals("prints one fallback message", 1, logging.getRecords().size());
assertEquals("fallback message content", "computerRetentionCheckInterval is limited to 60s", logging.getRecords().get(0).getMessage());
}

@Test
public void saveCycle() {
recordWarnings();
GlobalComputerRetentionCheckIntervalConfiguration c = new GlobalComputerRetentionCheckIntervalConfiguration();

JSONObject json = new JSONObject();
json.element("computerRetentionCheckInterval", 5);
try {
c.configure(Stapler.getCurrentRequest(), json);
} catch (Descriptor.FormException e) {
throw new RuntimeException(e);
}
assertEquals("stores value", 5, c.getComputerRetentionCheckInterval());

GlobalComputerRetentionCheckIntervalConfiguration c2 = new GlobalComputerRetentionCheckIntervalConfiguration();
c2.load();
assertEquals("round trip value", 5, c2.getComputerRetentionCheckInterval());
assertEquals("no fallback message", 0, logging.getRecords().size());
}

private void checkSaveInvalidValueOf(int interval, String message) {
recordWarnings();
GlobalComputerRetentionCheckIntervalConfiguration c = new GlobalComputerRetentionCheckIntervalConfiguration();

JSONObject json = new JSONObject();
json.element("computerRetentionCheckInterval", interval);
try {
c.configure(Stapler.getCurrentRequest(), json);
throw new RuntimeException("expected .configure() to throw");
} catch (Descriptor.FormException e) {
assertEquals(e.getMessage(), message);
}
assertEquals("does not store value", 60, c.getComputerRetentionCheckInterval());

GlobalComputerRetentionCheckIntervalConfiguration c2 = new GlobalComputerRetentionCheckIntervalConfiguration();
c2.load();
assertEquals("does not persist value", 60, c2.getComputerRetentionCheckInterval());
assertEquals("no fallback message", 0, logging.getRecords().size());
}

@Test
public void saveInvalidValueTooLow() {
checkSaveInvalidValueOf(0, "java.lang.IllegalArgumentException: interval must be greater than zero");
}

@Test
public void saveInvalidValueTooHigh() {
checkSaveInvalidValueOf(1337, "java.lang.IllegalArgumentException: interval must be below or equal 60s");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<div>
Configure the check interval (in seconds) of computer retention strategies.
Retention strategies are configured in the individual agent configuration
under "Availability". Administrators can trade elevated CPU usage from
frequent agent checks for responsiveness to capacity needs.
<p>
For example, if you run your build agents on short-lived VMs that boot fast,
you may want to scale up new capacity as soon as builds queue up and scale
VMs down as soon as no more work is available to cut delays for users and
costs of keeping idle VMs alive. Be sure to adjust the delays in the
respective retention strategies accordingly as well.
</p>
</div>

0 comments on commit 16b5d21

Please sign in to comment.