From 38d66291fc641e87ed3393b20ded338f0dfa6145 Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Fri, 5 Apr 2024 18:24:58 -0400 Subject: [PATCH 1/3] catches exceptions and logs when planning compactions --- .../compaction/CompactionJobGenerator.java | 73 ++++++++++++------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java b/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java index 1d88de2eaaf..670ff35d518 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java @@ -65,6 +65,7 @@ public class CompactionJobGenerator { private final PluginEnvironment env; private final Map> allExecutionHints; private final Cache,Long> unknownCompactionServiceErrorCache; + private final Cache generateJobsErrorCache; public CompactionJobGenerator(PluginEnvironment env, Map> executionHints) { @@ -87,42 +88,60 @@ public CompactionJobGenerator(PluginEnvironment env, unknownCompactionServiceErrorCache = Caches.getInstance().createNewBuilder(CacheName.COMPACTION_SERVICE_UNKNOWN, false) .expireAfterWrite(5, TimeUnit.MINUTES).build(); + + generateJobsErrorCache = + Caches.getInstance().createNewBuilder(CacheName.COMPACTION_SERVICE_UNKNOWN, false) + .expireAfterWrite(5, TimeUnit.MINUTES).build(); + } public Collection generateJobs(TabletMetadata tablet, Set kinds) { + try { + Collection systemJobs = Set.of(); - // ELASTICITY_TODO do not want user configured plugins to cause exceptions that prevents tablets - // from being - // assigned. So probably want to catch exceptions and log, but not too spammily OR some how - // report something - // back to the manager so it can log. - - Collection systemJobs = Set.of(); + if (kinds.contains(CompactionKind.SYSTEM)) { + CompactionServiceId serviceId = dispatch(CompactionKind.SYSTEM, tablet, Map.of()); + systemJobs = planCompactions(serviceId, CompactionKind.SYSTEM, tablet, Map.of()); + } - if (kinds.contains(CompactionKind.SYSTEM)) { - CompactionServiceId serviceId = dispatch(CompactionKind.SYSTEM, tablet, Map.of()); - systemJobs = planCompactions(serviceId, CompactionKind.SYSTEM, tablet, Map.of()); - } + Collection userJobs = Set.of(); - Collection userJobs = Set.of(); + if (kinds.contains(CompactionKind.USER) && tablet.getSelectedFiles() != null) { + var hints = allExecutionHints.get(tablet.getSelectedFiles().getFateId()); + if (hints != null) { + CompactionServiceId serviceId = dispatch(CompactionKind.USER, tablet, hints); + userJobs = planCompactions(serviceId, CompactionKind.USER, tablet, hints); + } + } - if (kinds.contains(CompactionKind.USER) && tablet.getSelectedFiles() != null) { - var hints = allExecutionHints.get(tablet.getSelectedFiles().getFateId()); - if (hints != null) { - CompactionServiceId serviceId = dispatch(CompactionKind.USER, tablet, hints); - userJobs = planCompactions(serviceId, CompactionKind.USER, tablet, hints); + if (userJobs.isEmpty()) { + return systemJobs; + } else if (systemJobs.isEmpty()) { + return userJobs; + } else { + var all = new ArrayList(systemJobs.size() + userJobs.size()); + all.addAll(systemJobs); + all.addAll(userJobs); + return all; + } + } catch (Exception e) { + // The same code that plans compactions also assigns tablets. The intent of this catch is + // mainly to defend against user plugins called here that throw an exception from negatively + // impacting tablet assignment. + String cacheKey = tablet.getTableId() + " " + e.getClass().getName(); + // This check defends against every tablet in a table having the same problem and therefore + // generating an enormous amount of spam for the logs. + var last = generateJobsErrorCache.getIfPresent(cacheKey); + if (last == null) { + log.error("Failed to generate compaction jobs for {}. Other tablets may be experiencing the" + + " same error, this log message is temporarily suppressed for the entire table.", + tablet.getExtent(), e); + generateJobsErrorCache.put(cacheKey, System.currentTimeMillis()); } - } - if (userJobs.isEmpty()) { - return systemJobs; - } else if (systemJobs.isEmpty()) { - return userJobs; - } else { - var all = new ArrayList(systemJobs.size() + userJobs.size()); - all.addAll(systemJobs); - all.addAll(userJobs); - return all; + log.trace("Failed to generate compaction jobs for {}", tablet.getExtent(), e); + + return Set.of(); } } From 091f521e24b2d2c5e1b2a2eaae30b984699da5bf Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Fri, 5 Apr 2024 18:36:14 -0400 Subject: [PATCH 2/3] fix build error --- .../accumulo/server/compaction/CompactionJobGenerator.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java b/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java index 670ff35d518..f9ea9f9aab6 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java @@ -133,8 +133,9 @@ public Collection generateJobs(TabletMetadata tablet, Set Date: Thu, 9 May 2024 18:19:49 -0400 Subject: [PATCH 3/3] code review update --- .../accumulo/server/compaction/CompactionJobGenerator.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java b/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java index f9ea9f9aab6..ac9b1c5c1c9 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/compaction/CompactionJobGenerator.java @@ -131,13 +131,12 @@ public Collection generateJobs(TabletMetadata tablet, Set