-
-
Notifications
You must be signed in to change notification settings - Fork 9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FIXES JENKINS-28245] Allow finer-grained tuning of ChannelPinger. #1687
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,11 +34,11 @@ | |
import jenkins.slaves.PingFailureAnalyzer; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
|
||
import static java.util.logging.Level.*; | ||
|
||
/** | ||
* Establish a periodic ping to keep connections between {@link Slave slaves} | ||
* and the main Jenkins node alive. This prevents network proxies from | ||
|
@@ -49,22 +49,43 @@ | |
@Extension | ||
public class ChannelPinger extends ComputerListener { | ||
private static final Logger LOGGER = Logger.getLogger(ChannelPinger.class.getName()); | ||
private static final String SYS_PROPERTY_NAME = ChannelPinger.class.getName() + ".pingInterval"; | ||
private static final String TIMEOUT_SECONDS_PROPERTY = ChannelPinger.class.getName() + ".pingTimeoutSeconds"; | ||
private static final String INTERVAL_MINUTES_PROPERTY = ChannelPinger.class.getName() + ".pingInterval"; | ||
private static final String INTERVAL_SECONDS_PROPERTY = ChannelPinger.class.getName() + ".pingIntervalSeconds"; | ||
|
||
/** | ||
* Interval for the ping in minutes. | ||
* Timeout for the ping in seconds. | ||
*/ | ||
private int pingInterval = 5; | ||
private final int pingTimeoutSeconds; | ||
|
||
/** | ||
* Interval for the ping in seconds. | ||
*/ | ||
private final int pingIntervalSeconds; | ||
|
||
public ChannelPinger() { | ||
String interval = System.getProperty(SYS_PROPERTY_NAME); | ||
if (interval != null) { | ||
try { | ||
pingInterval = Integer.valueOf(interval); | ||
} catch (NumberFormatException e) { | ||
LOGGER.warning("Ignoring invalid " + SYS_PROPERTY_NAME + "=" + interval); | ||
pingTimeoutSeconds = Integer.getInteger(TIMEOUT_SECONDS_PROPERTY, 4 * 60); | ||
|
||
// A little extra hoop-jumping to migrate from the old system property | ||
Integer intervalSeconds = Integer.getInteger(INTERVAL_SECONDS_PROPERTY); | ||
Integer intervalMinutes = Integer.getInteger(INTERVAL_MINUTES_PROPERTY); | ||
if (intervalMinutes != null) { | ||
LOGGER.warning("Property '" + INTERVAL_MINUTES_PROPERTY + "' is deprecated. Please migrate to '" + INTERVAL_SECONDS_PROPERTY + "'"); | ||
|
||
if (intervalSeconds != null) { | ||
LOGGER.log(Level.WARNING, "Ignoring {0}={1} because {2}={3}", | ||
new Object[] { INTERVAL_MINUTES_PROPERTY, intervalMinutes, INTERVAL_SECONDS_PROPERTY, intervalSeconds }); | ||
} else { | ||
intervalSeconds = intervalMinutes * 60; | ||
} | ||
} | ||
|
||
pingIntervalSeconds = intervalSeconds == null ? 5 * 60 : intervalSeconds; | ||
|
||
if (pingIntervalSeconds < pingTimeoutSeconds) { | ||
LOGGER.log(Level.WARNING, "Ping interval ({0}) is less than ping timeout ({1})", | ||
new Object[] { pingIntervalSeconds, pingTimeoutSeconds }); | ||
} | ||
} | ||
|
||
@Override | ||
|
@@ -73,52 +94,80 @@ public void preOnline(Computer c, Channel channel, FilePath root, TaskListener l | |
} | ||
|
||
public void install(Channel channel) { | ||
if (pingInterval < 1) { | ||
if (pingTimeoutSeconds < 1 || pingIntervalSeconds < 1) { | ||
LOGGER.fine("Slave ping is disabled"); | ||
return; | ||
} | ||
|
||
try { | ||
channel.call(new SetUpRemotePing(pingInterval)); | ||
channel.call(new SetUpRemotePing(pingTimeoutSeconds, pingIntervalSeconds)); | ||
LOGGER.fine("Set up a remote ping for " + channel.getName()); | ||
} catch (Exception e) { | ||
LOGGER.severe("Failed to set up a ping for " + channel.getName()); | ||
} | ||
|
||
// set up ping from both directions, so that in case of a router dropping a connection, | ||
// both sides can notice it and take compensation actions. | ||
setUpPingForChannel(channel, pingInterval); | ||
setUpPingForChannel(channel, pingTimeoutSeconds, pingIntervalSeconds); | ||
} | ||
|
||
private static class SetUpRemotePing extends MasterToSlaveCallable<Void, IOException> { | ||
private static final long serialVersionUID = -2702219700841759872L; | ||
private int pingInterval; | ||
public SetUpRemotePing(int pingInterval) { | ||
this.pingInterval = pingInterval; | ||
static class SetUpRemotePing extends MasterToSlaveCallable<Void, IOException> { | ||
private final int pingTimeoutSeconds; | ||
private final int pingIntervalSeconds; | ||
|
||
SetUpRemotePing(int pingTimeoutSeconds, int pingIntervalSeconds) { | ||
this.pingTimeoutSeconds = pingTimeoutSeconds; | ||
this.pingIntervalSeconds = pingIntervalSeconds; | ||
} | ||
|
||
@Override | ||
public Void call() throws IOException { | ||
setUpPingForChannel(Channel.current(), pingInterval); | ||
setUpPingForChannel(Channel.current(), pingTimeoutSeconds, pingIntervalSeconds); | ||
return null; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) { | ||
return true; | ||
} | ||
if (o == null || getClass() != o.getClass()) { | ||
return false; | ||
} | ||
|
||
SetUpRemotePing that = (SetUpRemotePing) o; | ||
return this.pingTimeoutSeconds == that.pingTimeoutSeconds | ||
&& this.pingIntervalSeconds == that.pingIntervalSeconds; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
// TODO(deadmoose): switch to Objects.hash once Java 7's fully available | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have thought a simple arithmetic expression with a prime would work here (such as is auto generated by eclipse) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case it's really just two ints, so I could make up a magic number and glom them together, but given the language provides a convenient (and soon even more convenient) method to Just Do It, I lean toward punting the maintenance burden to the language team |
||
return Arrays.hashCode(new Object[] { pingTimeoutSeconds, pingIntervalSeconds }); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "SetUpRemotePing(" + pingTimeoutSeconds + "," + pingIntervalSeconds + ")"; | ||
} | ||
} | ||
|
||
private static void setUpPingForChannel(final Channel channel, int interval) { | ||
static void setUpPingForChannel(final Channel channel, int timeoutSeconds, int intervalSeconds) { | ||
final AtomicBoolean isInClosed = new AtomicBoolean(false); | ||
final PingThread t = new PingThread(channel, interval * 60 * 1000) { | ||
final PingThread t = new PingThread(channel, timeoutSeconds * 1000L, intervalSeconds * 1000L) { | ||
protected void onDead(Throwable cause) { | ||
try { | ||
for (PingFailureAnalyzer pfa : PingFailureAnalyzer.all()) { | ||
pfa.onPingFailure(channel,cause); | ||
} | ||
if (isInClosed.get()) { | ||
LOGGER.log(FINE,"Ping failed after the channel "+channel.getName()+" is already partially closed.",cause); | ||
LOGGER.log(Level.FINE,"Ping failed after the channel "+channel.getName()+" is already partially closed.",cause); | ||
} else { | ||
LOGGER.log(INFO,"Ping failed. Terminating the channel "+channel.getName()+".",cause); | ||
LOGGER.log(Level.INFO,"Ping failed. Terminating the channel "+channel.getName()+".",cause); | ||
channel.close(cause); | ||
} | ||
} catch (IOException e) { | ||
LOGGER.log(SEVERE,"Failed to terminate the channel "+channel.getName(),e); | ||
LOGGER.log(Level.SEVERE,"Failed to terminate the channel "+channel.getName(),e); | ||
} | ||
} | ||
protected void onDead() { | ||
|
@@ -136,6 +185,7 @@ public void onClosed(Channel channel, IOException cause) { | |
}); | ||
|
||
t.start(); | ||
LOGGER.fine("Ping thread started for " + channel + " with a " + interval + " minute interval"); | ||
LOGGER.log(Level.FINE, "Ping thread started for {0} with a {1} second interval and a {2} second timeout.", | ||
new Object[] { channel, intervalSeconds, timeoutSeconds }); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package hudson.slaves; | ||
|
||
import static org.mockito.Matchers.eq; | ||
import static org.mockito.Mockito.verify; | ||
import static org.mockito.Mockito.when; | ||
import static org.powermock.api.mockito.PowerMockito.mockStatic; | ||
import static org.powermock.api.mockito.PowerMockito.verifyNoMoreInteractions; | ||
import static org.powermock.api.mockito.PowerMockito.verifyStatic; | ||
|
||
import com.google.common.testing.EqualsTester; | ||
import org.junit.After; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.mockito.Mock; | ||
import org.mockito.MockitoAnnotations; | ||
import org.powermock.api.mockito.PowerMockito; | ||
import org.powermock.core.classloader.annotations.PrepareForTest; | ||
import org.powermock.modules.junit4.PowerMockRunner; | ||
import hudson.remoting.Channel; | ||
|
||
import java.util.Map; | ||
import java.util.HashMap; | ||
|
||
@RunWith(PowerMockRunner.class) | ||
@PrepareForTest({ ChannelPinger.class }) | ||
public class ChannelPingerTest { | ||
|
||
@Mock private Channel mockChannel; | ||
|
||
private Map<String, String> savedSystemProperties = new HashMap<String, String>(); | ||
|
||
@Before | ||
public void setUp() throws Exception { | ||
MockitoAnnotations.initMocks(this); | ||
mockStatic(ChannelPinger.class); | ||
} | ||
|
||
@Before | ||
public void preserveSystemProperties() throws Exception { | ||
preserveSystemProperty("hudson.slaves.ChannelPinger.pingInterval"); | ||
preserveSystemProperty("hudson.slaves.ChannelPinger.pingIntervalSeconds"); | ||
preserveSystemProperty("hudson.slaves.ChannelPinger.pingTimeoutSeconds"); | ||
} | ||
|
||
@After | ||
public void restoreSystemProperties() throws Exception { | ||
for (Map.Entry<String, String> entry : savedSystemProperties.entrySet()) { | ||
if (entry.getValue() != null) { | ||
System.setProperty(entry.getKey(), entry.getValue()); | ||
} else { | ||
System.clearProperty(entry.getKey()); | ||
} | ||
} | ||
} | ||
|
||
private void preserveSystemProperty(String propertyName) { | ||
savedSystemProperties.put(propertyName, System.getProperty(propertyName)); | ||
System.clearProperty(propertyName); | ||
} | ||
|
||
@Test | ||
public void testDefaults() throws Exception { | ||
ChannelPinger channelPinger = new ChannelPinger(); | ||
channelPinger.install(mockChannel); | ||
|
||
verify(mockChannel).call(eq(new ChannelPinger.SetUpRemotePing(240, 300))); | ||
verifyStatic(); | ||
ChannelPinger.setUpPingForChannel(mockChannel, 240, 300); | ||
} | ||
|
||
@Test | ||
public void testFromSystemProperties() throws Exception { | ||
System.setProperty("hudson.slaves.ChannelPinger.pingTimeoutSeconds", "42"); | ||
System.setProperty("hudson.slaves.ChannelPinger.pingIntervalSeconds", "73"); | ||
|
||
ChannelPinger channelPinger = new ChannelPinger(); | ||
channelPinger.install(mockChannel); | ||
|
||
verify(mockChannel).call(new ChannelPinger.SetUpRemotePing(42, 73)); | ||
verifyStatic(); | ||
ChannelPinger.setUpPingForChannel(mockChannel, 42, 73); | ||
} | ||
|
||
@Test | ||
public void testFromOldSystemProperty() throws Exception { | ||
System.setProperty("hudson.slaves.ChannelPinger.pingInterval", "7"); | ||
|
||
ChannelPinger channelPinger = new ChannelPinger(); | ||
channelPinger.install(mockChannel); | ||
|
||
verify(mockChannel).call(eq(new ChannelPinger.SetUpRemotePing(240, 420))); | ||
verifyStatic(); | ||
ChannelPinger.setUpPingForChannel(mockChannel, 240, 420); | ||
} | ||
|
||
@Test | ||
public void testNewSystemPropertyTrumpsOld() throws Exception { | ||
System.setProperty("hudson.slaves.ChannelPinger.pingIntervalSeconds", "73"); | ||
System.setProperty("hudson.slaves.ChannelPinger.pingInterval", "7"); | ||
|
||
ChannelPinger channelPinger = new ChannelPinger(); | ||
channelPinger.install(mockChannel); | ||
|
||
verify(mockChannel).call(eq(new ChannelPinger.SetUpRemotePing(240, 73))); | ||
verifyStatic(); | ||
ChannelPinger.setUpPingForChannel(mockChannel, 240, 73); | ||
} | ||
|
||
@Test | ||
public void testSetUpRemotePingEquality() { | ||
new EqualsTester() | ||
.addEqualityGroup(new ChannelPinger.SetUpRemotePing(1, 2), new ChannelPinger.SetUpRemotePing(1, 2)) | ||
.addEqualityGroup(new ChannelPinger.SetUpRemotePing(2, 3), new ChannelPinger.SetUpRemotePing(2, 3)) | ||
.testEquals(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,6 +96,7 @@ THE SOFTWARE. | |
<project.patchManagement.url>https://api.github.com</project.patchManagement.url> | ||
<patch.tracker.serverId>jenkins-jira</patch.tracker.serverId> | ||
|
||
<guavaVersion>11.0.1</guavaVersion> | ||
<slf4jVersion>1.7.7</slf4jVersion> <!-- < 1.6.x version didn't specify the license (MIT) --> | ||
<maven-plugin.version>2.7.1</maven-plugin.version> | ||
<matrix-project.version>1.4.1</matrix-project.version> | ||
|
@@ -182,7 +183,12 @@ THE SOFTWARE. | |
<dependency> | ||
<groupId>com.google.guava</groupId> | ||
<artifactId>guava</artifactId> | ||
<version>11.0.1</version> | ||
<version>${guavaVersion}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.google.guava</groupId> | ||
<artifactId>guava-testlib</artifactId> | ||
<version>${guavaVersion}</version> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I get the code correctly, it should be included in the test scope only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See the above comments on the previous version where I'd had a mental hiccup and done just that. This is the big block that's specifying versions of things, and then it's scoped to test down in in core/pom.xml where it's actually consumed. (And then, anecdotally, it appears scoping inside dependencyManagement is completely ignored since core/pom.xml still successfully picked up a version number from the supposedly scoped definition here) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed |
||
</dependency> | ||
|
||
<dependency> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think (but may be mistaken) that this should have a serialVersionUID defined.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As mentioned in the change description, it really isn't; these don't get persisted and re-read by potentially differently-compiled code, and remoting is already taking care of making master/slave run the same compiled bits with the JVM's same auto-rolled UID.
There are already plenty of Callables demonstrating this if you grep for MasterToSlaveCallable, e.g.:
https://github.com/jenkinsci/jenkins/blob/master/core/src/main/java/jenkins/util/VirtualFile.java#L417-L438
https://github.com/jenkinsci/jenkins/blob/master/core/src/main/java/jenkins/slaves/restarter/JnlpSlaveRestarterInstaller.java#L52-L92
https://github.com/jenkinsci/jenkins/blob/master/core/src/main/java/jenkins/slaves/StandardOutputSwapper.java#L38-L74
https://github.com/jenkinsci/jenkins/blob/master/core/src/main/java/jenkins/model/Jenkins.java#L2048-L2052
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are issues with differences in JVM versions between master and slave. I have customer issues where not having a serialVersionUID defined has caused Jenkins to blow up (thankfully all of these cases were in plugins). Just because some instances of callables in Core have been lucky so far does not mean it is a safe pattern to follow.
In addition, assuming that the Jenkins core implementation of Remoting (with its current contract of classloaders) is the only implementation is a risky assumption. We have an alternative implementation for Master-to-Master communication which (for security reasons) does not allow remote class loading and as such the serialVersionUID must match for any callables that are transferred over that Channel. While we are not in a position to release that implementation at the present time, I know it is on our roadmap to release it in the future. It would be better if the habit of serialVersionUID alway was better applied in Core so that when we do open source this code we can set an earlier lower-bound on the Master version that the code applies to.