From 5900663abe89bd8ca58139b833162080176214c2 Mon Sep 17 00:00:00 2001 From: Hannu Varjoranta Date: Mon, 24 Aug 2015 11:32:59 +0200 Subject: [PATCH] add debug for too many open files issue --- .../spotify/reaper/service/SegmentRunner.java | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/spotify/reaper/service/SegmentRunner.java b/src/main/java/com/spotify/reaper/service/SegmentRunner.java index 2c995249b..92cacdcc9 100644 --- a/src/main/java/com/spotify/reaper/service/SegmentRunner.java +++ b/src/main/java/com/spotify/reaper/service/SegmentRunner.java @@ -24,6 +24,7 @@ import com.spotify.reaper.cassandra.RepairStatusHandler; import com.spotify.reaper.core.RepairSegment; import com.spotify.reaper.core.RepairUnit; +import com.sun.management.UnixOperatingSystemMXBean; import org.apache.cassandra.repair.RepairParallelism; import org.apache.cassandra.service.ActiveRepairService; @@ -33,6 +34,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.lang.management.ManagementFactory; +import java.lang.management.OperatingSystemMXBean; +import java.net.SocketException; import java.util.Collection; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -121,6 +125,19 @@ public void postponeCurrentSegment() { } } + /** + * This method is intended to be temporary, until we find the root issue of too many open files + * issue. + */ + private long getOpenFilesAmount() { + OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean(); + long amountOfOpenFiles = -1; + if (os instanceof UnixOperatingSystemMXBean) { + amountOfOpenFiles = ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount(); + } + return amountOfOpenFiles; + } + private void runRepair() { LOG.debug("Run repair for segment #{}", segmentId); final RepairSegment segment = context.storage.getRepairSegment(segmentId).get(); @@ -196,6 +213,7 @@ private void runRepair() { String msg = "Postponed a segment because no coordinator was reachable"; repairRunner.updateLastEvent(msg); postponeCurrentSegment(); + LOG.warn("Open files amount for process: " + getOpenFilesAmount()); } LOG.debug("Exiting synchronized section with segment ID {}", segmentId); } @@ -243,11 +261,10 @@ boolean canRepair(RepairSegment segment, String keyspace, JmxProxy coordinator) } catch (RuntimeException e) { LOG.warn("SegmentRunner declined to repair segment {} because of an error collecting " + "information from one of the hosts ({}): {}", segmentId, hostName, e); - String - msg = - String - .format("Postponed due to inability to collect information from host %s", hostName); + String msg = String.format("Postponed due to inability to collect " + + "information from host %s", hostName); repairRunner.updateLastEvent(msg); + LOG.warn("Open files amount for process: " + getOpenFilesAmount()); return false; } }