Skip to content

Commit

Permalink
Merge pull request #91 from ahenry/issue89
Browse files Browse the repository at this point in the history
Fix bug that caused repair run to fail if nodes are going up or down
  • Loading branch information
Bj0rnen committed Apr 13, 2015
2 parents 9622f83 + a9ad7a1 commit 4533ef7
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions src/main/java/com/spotify/reaper/service/SegmentRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,18 @@ private void runRepair() {

boolean canRepair(RepairSegment segment, String keyspace, JmxProxy coordinator,
RepairRun repairRun) {
Collection<String> allHosts =
coordinator.tokenRangeToEndpoint(keyspace, segment.getTokenRange());
Collection<String> allHosts;
try {
// when hosts are coming up or going down, this method can throw an
// UndeclaredThrowableException
allHosts = coordinator.tokenRangeToEndpoint(keyspace, segment.getTokenRange());
} catch (RuntimeException e) {
LOG.warn("SegmentRunner couldn't get token ranges from coordinator: ", e);
String msg = String.format("SegmentRunner couldn't get token ranges from coordinator");
context.storage.updateRepairRun(repairRun.with().lastEvent(msg).build(repairRun.getId()));
return false;
}

for (String hostName : allHosts) {
LOG.debug("checking host '{}' for pending compactions and other repairs (can repair?)"
+ " Run id '{}'", hostName, segment.getRunId());
Expand Down Expand Up @@ -213,6 +223,12 @@ boolean canRepair(RepairSegment segment, String keyspace, JmxProxy coordinator,
String msg = String.format("Postponed due to inability to connect host %s", hostName);
context.storage.updateRepairRun(repairRun.with().lastEvent(msg).build(repairRun.getId()));
return false;
} catch (RuntimeException e) {
LOG.warn("SegmentRunner declined to repair segment {} because of an error collecting "
+ "information from one of the hosts ({}): {}", segmentId, hostName, e);
String msg = String.format("Postponed due to inability to collect information from host %s", hostName);
context.storage.updateRepairRun(repairRun.with().lastEvent(msg).build(repairRun.getId()));
return false;
}
}
LOG.info("It is ok to repair segment '{}' om repair run with id '{}'",
Expand Down

0 comments on commit 4533ef7

Please sign in to comment.