Skip to content

Commit

Permalink
[7.0.0] Generalize memory and cpu tracking (#20396)
Browse files Browse the repository at this point in the history
Remove special handling of cpu and memory, and instead track them as any
other resource. This has a few advantages:
- Significant code clean up, keep only the logic for generic resource
around
  - Consistent resources type (double) for all resource

Relates to #19679

Closes #19839.

Commit
ebe4d0d

PiperOrigin-RevId: 585926931
Change-Id: Id0d6a14e9c151c3895f55d1808b6bd66eecf98b3

Co-authored-by: Alessandro Patti <[email protected]>
  • Loading branch information
iancha1992 and AlessandroPatti authored Nov 30, 2023
1 parent 789feee commit 0725711
Show file tree
Hide file tree
Showing 17 changed files with 238 additions and 327 deletions.
173 changes: 65 additions & 108 deletions src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.profiler.AutoProfiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
Expand Down Expand Up @@ -139,23 +140,23 @@ public static ResourceManager instance() {
/** Returns prediction of RAM in Mb used by registered actions. */
@Override
public double getUsedMemoryInMb() {
return usedRam;
return usedResources.get(ResourceSet.MEMORY);
}

/** Returns prediction of CPUs used by registered actions. */
@Override
public double getUsedCPU() {
return usedCpu;
return usedResources.get(ResourceSet.CPU);
}

// Allocated resources are allowed to go "negative", but at least
// MIN_AVAILABLE_CPU_RATIO portion of CPU and MIN_AVAILABLE_RAM_RATIO portion
// of RAM should be available.
// MIN_NECESSARY_RATIO portion of each resource should be available.
// Please note that this value is purely empirical - we assume that generally
// requested resources are somewhat pessimistic and thread would end up
// using less than requested amount.
private static final double MIN_NECESSARY_CPU_RATIO = 0.6;
private static final double MIN_NECESSARY_RAM_RATIO = 1.0;
private static final Double DEFAULT_MIN_NECESSARY_RATIO = 1.0;
private static final ImmutableMap<String, Double> MIN_NECESSARY_RATIO =
ImmutableMap.of(ResourceSet.CPU, 0.6);

// Lists of blocked threads. Associated CountDownLatch object will always
// be initialized to 1 during creation in the acquire() method.
Expand All @@ -179,18 +180,9 @@ public double getUsedCPU() {
// LocalHostCapacity.getLocalHostCapacity() as an argument.
@VisibleForTesting public ResourceSet availableResources = null;

// Used amount of CPU capacity (where 1.0 corresponds to the one fully
// occupied CPU core. Corresponds to the CPU resource definition in the
// ResourceSet class.
private double usedCpu;

// Used amount of RAM capacity in MB. Corresponds to the RAM resource
// definition in the ResourceSet class.
private double usedRam;

// Used amount of extra resources. Corresponds to the extra resource
// Used amount of resources. Corresponds to the resource
// definition in the ResourceSet class.
private Map<String, Float> usedExtraResources;
private Map<String, Double> usedResources;

// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
private int usedLocalTestCount;
Expand All @@ -206,9 +198,7 @@ public static ResourceManager instanceForTestingOnly() {
* <p>Note - it does not reset available resources. Use separate call to setAvailableResources().
*/
public synchronized void resetResourceUsage() {
usedCpu = 0;
usedRam = 0;
usedExtraResources = new HashMap<>();
usedResources = new HashMap<>();
usedLocalTestCount = 0;
for (Pair<ResourceSet, LatchWithWorker> request : localRequests) {
request.second.latch.countDown();
Expand Down Expand Up @@ -298,20 +288,14 @@ public ResourceHandle acquireResources(
@Nullable
private Worker incrementResources(ResourceSet resources)
throws IOException, InterruptedException {
usedCpu += resources.getCpuUsage();
usedRam += resources.getMemoryMb();

resources
.getExtraResourceUsage()
.entrySet()
.getResources()
.forEach(
resource -> {
String key = (String) resource.getKey();
float value = resource.getValue();
if (usedExtraResources.containsKey(key)) {
value += (float) usedExtraResources.get(key);
(key, value) -> {
if (usedResources.containsKey(key)) {
value += usedResources.get(key);
}
usedExtraResources.put(key, value);
usedResources.put(key, value);
});

usedLocalTestCount += resources.getLocalTestCount();
Expand All @@ -324,9 +308,7 @@ private Worker incrementResources(ResourceSet resources)

/** Return true if any resources have been claimed through this manager. */
public synchronized boolean inUse() {
return usedCpu != 0.0
|| usedRam != 0.0
|| !usedExtraResources.isEmpty()
return !usedResources.isEmpty()
|| usedLocalTestCount != 0
|| !localRequests.isEmpty()
|| !dynamicWorkerRequests.isEmpty()
Expand Down Expand Up @@ -428,32 +410,24 @@ private boolean release(ResourceSet resources, @Nullable Worker worker)
}

private synchronized void releaseResourcesOnly(ResourceSet resources) {
usedCpu -= resources.getCpuUsage();
usedRam -= resources.getMemoryMb();

usedLocalTestCount -= resources.getLocalTestCount();

// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
// e.g. 1E-15. So if it is small enough, we set it to 0. But maybe there is a better solution.
double epsilon = 0.0001;
if (usedCpu < epsilon) {
usedCpu = 0;
}
if (usedRam < epsilon) {
usedRam = 0;
}

Set<String> toRemove = new HashSet<>();
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String) resource.getKey();
float value = (float) usedExtraResources.get(key) - resource.getValue();
usedExtraResources.put(key, value);
for (Map.Entry<String, Double> resource : resources.getResources().entrySet()) {
String key = resource.getKey();
double value = usedResources.getOrDefault(key, 0.0) - resource.getValue();
usedResources.put(key, value);
if (value < epsilon) {
toRemove.add(key);
}
}
usedResources.keySet().removeAll(toRemove);
for (String key : toRemove) {
usedExtraResources.remove(key);
usedResources.remove(key);
}
}

Expand Down Expand Up @@ -494,29 +468,27 @@ private synchronized void processWaitingThreads(Deque<Pair<ResourceSet, LatchWit
}

/** Throws an exception if requested extra resource isn't being tracked */
private void assertExtraResourcesTracked(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String) resource.getKey();
if (!availableResources.getExtraResourceUsage().containsKey(key)) {
private void assertResourcesTracked(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Double> resource : resources.getResources().entrySet()) {
String key = resource.getKey();
if (!availableResources.getResources().containsKey(key)) {
throw new NoSuchElementException(
"Resource " + key + " is not tracked in this resource set.");
}
}
}

/** Return true iff all requested extra resources are considered to be available. */
private boolean areExtraResourcesAvailable(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String) resource.getKey();
float used = (float) usedExtraResources.getOrDefault(key, 0f);
float requested = resource.getValue();
float available = availableResources.getExtraResourceUsage().get(key);
float epsilon = 0.0001f; // Account for possible rounding errors.
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
return false;
}
}
return true;
private static <T extends Number> boolean isAvailable(T available, T used, T requested) {
// Resources are considered available if any one of the conditions below is true:
// 1) If resource is not requested at all, it is available.
// 2) If resource is not used at the moment, it is considered to be
// available regardless of how much is requested. This is necessary to
// ensure that at any given time, at least one thread is able to acquire
// resources even if it requests more than available.
// 3) If used resource amount is less than total available resource amount.
return requested.doubleValue() == 0
|| used.doubleValue() == 0
|| used.doubleValue() + requested.doubleValue() <= available.doubleValue();
}

// Method will return true if all requested resources are considered to be available.
Expand All @@ -527,59 +499,44 @@ boolean areResourcesAvailable(ResourceSet resources) throws NoSuchElementExcepti
// by the release() method.

WorkerKey workerKey = resources.getWorkerKey();
int availableWorkers = 0;
int activeWorkers = 0;
if (workerKey != null) {
availableWorkers = this.workerPool.getMaxTotalPerKey(workerKey);
activeWorkers = this.workerPool.getNumActive(workerKey);
int availableWorkers = this.workerPool.getMaxTotalPerKey(workerKey);
int activeWorkers = this.workerPool.getNumActive(workerKey);
if (activeWorkers >= availableWorkers) {
return false;
}
}
boolean workerIsAvailable = workerKey == null || (activeWorkers < availableWorkers);

// We test for tracking of extra resources whenever acquired and throw an
// exception before acquiring any untracked resource.
assertExtraResourcesTracked(resources);
assertResourcesTracked(resources);

if (usedCpu == 0.0
&& usedRam == 0.0
&& usedExtraResources.isEmpty()
&& usedLocalTestCount == 0
&& workerIsAvailable) {
if (usedResources.isEmpty() && usedLocalTestCount == 0) {
return true;
}
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
// allocation. This is necessary to account for the fact that most of the
// requested resource sets use pessimistic estimations. Note that this
// ratio is used only during comparison - for tracking we will actually
// mark whole requested amount as used.
double cpu = resources.getCpuUsage() * MIN_NECESSARY_CPU_RATIO;
double ram = resources.getMemoryMb() * MIN_NECESSARY_RAM_RATIO;
int localTestCount = resources.getLocalTestCount();

double availableCpu = availableResources.getCpuUsage();
double availableRam = availableResources.getMemoryMb();
int availableLocalTestCount = availableResources.getLocalTestCount();

double remainingRam = availableRam - usedRam;
int availableLocalTestCount = availableResources.getLocalTestCount();
if (!isAvailable(availableLocalTestCount, usedLocalTestCount, resources.getLocalTestCount())) {
return false;
}

// Resources are considered available if any one of the conditions below is true:
// 1) If resource is not requested at all, it is available.
// 2) If resource is not used at the moment, it is considered to be
// available regardless of how much is requested. This is necessary to
// ensure that at any given time, at least one thread is able to acquire
// resources even if it requests more than available.
// 3) If used resource amount is less than total available resource amount.
boolean cpuIsAvailable = cpu == 0.0 || usedCpu == 0.0 || usedCpu + cpu <= availableCpu;
boolean ramIsAvailable = ram == 0.0 || usedRam == 0.0 || ram <= remainingRam;
boolean localTestCountIsAvailable =
localTestCount == 0
|| usedLocalTestCount == 0
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
return cpuIsAvailable
&& ramIsAvailable
&& extraResourcesIsAvailable
&& localTestCountIsAvailable
&& workerIsAvailable;
for (Map.Entry<String, Double> resource : resources.getResources().entrySet()) {
String key = resource.getKey();

// Use only MIN_NECESSARY_RATIO of the resource value to check for
// allocation. This is necessary to account for the fact that most of the
// requested resource sets use pessimistic estimations. Note that this
// ratio is used only during comparison - for tracking we will actually
// mark whole requested amount as used.
double requested =
resource.getValue() * MIN_NECESSARY_RATIO.getOrDefault(key, DEFAULT_MIN_NECESSARY_RATIO);
double used = usedResources.getOrDefault(key, 0.0);
double available = availableResources.get(key);
if (!isAvailable(available, used, requested)) {
return false;
}
}
return true;
}

@VisibleForTesting
Expand Down
Loading

0 comments on commit 0725711

Please sign in to comment.