Skip to content

Commit

Permalink
[feature][broker]Provide new load balance placement strategy implemen…
Browse files Browse the repository at this point in the history
…tation based on the least resource usage with weight (apache#16281)

email discussion thread: https://lists.apache.org/thread/36vyyhndr4og175k2bz3mfdf5ctd2xky

### Motivation
See PIP-182 apache#16274 

### Modifications
See apache#16274 
 
The main idea of the new strategy is to unify the requirement of load shedding strategy and bundle placement strategy, which consider the resource usage with weight, including historical observations.

How to calculate a score for a broker ?
- use its historical load and short-term load data with weight, which can solve the case of load jitter in a broker.

How to select a broker for assignning bundle ?
- select a broker based on which one has the least average resource usage with weight.
- the random selection algorithm is better than the `minScore` among low load brokers, and use `loadBalancerAverageResourceUsageDifferenceThresholdShedderPercentage ` to adjust the size of the `randomization pool`
  • Loading branch information
HQebupt authored Jul 12, 2022
1 parent 7c9ad1c commit d5088e3
Show file tree
Hide file tree
Showing 6 changed files with 271 additions and 7 deletions.
10 changes: 10 additions & 0 deletions conf/broker.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1179,12 +1179,22 @@ defaultNamespaceBundleSplitAlgorithm=range_equally_divide
# load shedding strategy, support OverloadShedder and ThresholdShedder, default is ThresholdShedder since 2.10.0
loadBalancerLoadSheddingStrategy=org.apache.pulsar.broker.loadbalance.impl.ThresholdShedder

# load balance placement strategy, support LeastLongTermMessageRate and LeastResourceUsageWithWeight
loadBalancerLoadPlacementStrategy=org.apache.pulsar.broker.loadbalance.impl.LeastLongTermMessageRate

# The broker resource usage threshold.
# When the broker resource usage is greater than the pulsar cluster average resource usage,
# the threshold shedder will be triggered to offload bundles from the broker.
# It only takes effect in the ThresholdShedder strategy.
loadBalancerBrokerThresholdShedderPercentage=10

# The broker average resource usage difference threshold.
# Average resource usage difference threshold to determine a broker whether to be a best candidate in LeastResourceUsageWithWeight.
# (eg: broker1 with 10% resource usage with weight and broker2 with 30% and broker3 with 80% will have 40% average resource usage.
# The placement strategy can select broker1 and broker2 as best candidates.)
# It only takes effect in the LeastResourceUsageWithWeight strategy.
loadBalancerAverageResourceUsageDifferenceThresholdPercentage=10

# Message-rate percentage threshold between highest and least loaded brokers for
# uniform load shedding. (eg: broker1 with 50K msgRate and broker2 with 30K msgRate
# will have 66% msgRate difference and load balancer can unload bundles from broker-1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1992,6 +1992,13 @@ public class ServiceConfiguration implements PulsarConfiguration {
)
private String loadBalancerLoadSheddingStrategy = "org.apache.pulsar.broker.loadbalance.impl.ThresholdShedder";

@FieldContext(
category = CATEGORY_LOAD_BALANCER,
doc = "load balance placement strategy"
)
private String loadBalancerLoadPlacementStrategy =
"org.apache.pulsar.broker.loadbalance.impl.LeastLongTermMessageRate";

@FieldContext(
dynamic = true,
category = CATEGORY_LOAD_BALANCER,
Expand Down Expand Up @@ -2066,6 +2073,16 @@ public class ServiceConfiguration implements PulsarConfiguration {
)
private int loadBalancerBrokerThresholdShedderPercentage = 10;

@FieldContext(
dynamic = true,
category = CATEGORY_LOAD_BALANCER,
doc = "Average resource usage difference threshold to determine a broker whether to be a best candidate in "
+ "LeastResourceUsageWithWeight.(eg: broker1 with 10% resource usage with weight "
+ "and broker2 with 30% and broker3 with 80% will have 40% average resource usage. "
+ "The placement strategy can select broker1 and broker2 as best candidates.)"
)
private int loadBalancerAverageResourceUsageDifferenceThresholdPercentage = 10;

@FieldContext(
dynamic = true,
category = CATEGORY_LOAD_BALANCER,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import java.util.Optional;
import java.util.Set;
import org.apache.pulsar.broker.ServiceConfiguration;
import org.apache.pulsar.broker.loadbalance.impl.LeastLongTermMessageRate;
import org.apache.pulsar.common.util.Reflections;
import org.apache.pulsar.policies.data.loadbalancer.BundleData;

/**
Expand Down Expand Up @@ -49,15 +49,17 @@ Optional<String> selectBroker(Set<String> candidates, BundleData bundleToAssign,
/**
* Create a placement strategy using the configuration.
*
* @param conf ServiceConfiguration to use.
* @return A placement strategy from the given configurations.
*/
static ModularLoadManagerStrategy create() {
static ModularLoadManagerStrategy create(final ServiceConfiguration conf) {
try {
// Only one strategy at the moment.
return new LeastLongTermMessageRate();
return Reflections.createInstance(conf.getLoadBalancerLoadPlacementStrategy(),
ModularLoadManagerStrategy.class, Thread.currentThread().getContextClassLoader());
} catch (Exception e) {
// Ignore
throw new RuntimeException(
"Could not load LoadBalancerLoadPlacementStrategy:" + conf.getLoadBalancerLoadPlacementStrategy(),
e);
}
return new LeastLongTermMessageRate();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.broker.loadbalance.impl;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import lombok.extern.slf4j.Slf4j;
import org.apache.pulsar.broker.ServiceConfiguration;
import org.apache.pulsar.broker.loadbalance.LoadData;
import org.apache.pulsar.broker.loadbalance.ModularLoadManagerStrategy;
import org.apache.pulsar.policies.data.loadbalancer.BrokerData;
import org.apache.pulsar.policies.data.loadbalancer.BundleData;
import org.apache.pulsar.policies.data.loadbalancer.LocalBrokerData;

/**
* Placement strategy which selects a broker based on which one has the least resource usage with weight.
* This strategy takes into account the historical load percentage and short-term load percentage, and thus will not
* cause cluster fluctuations due to short-term load jitter.
*/
@Slf4j
public class LeastResourceUsageWithWeight implements ModularLoadManagerStrategy {
// Maintain this list to reduce object creation.
private final ArrayList<String> bestBrokers;
private final Map<String, Double> brokerAvgResourceUsageWithWeight;

public LeastResourceUsageWithWeight() {
this.bestBrokers = new ArrayList<>();
this.brokerAvgResourceUsageWithWeight = new HashMap<>();
}

// A broker's max resource usage with weight using its historical load and short-term load data with weight.
private double getMaxResourceUsageWithWeight(final String broker, final BrokerData brokerData,
final ServiceConfiguration conf) {
final double overloadThreshold = conf.getLoadBalancerBrokerOverloadedThresholdPercentage() / 100.0;
final double maxUsageWithWeight =
updateAndGetMaxResourceUsageWithWeight(broker, brokerData, conf);

if (maxUsageWithWeight > overloadThreshold) {
final LocalBrokerData localData = brokerData.getLocalData();
log.warn(
"Broker {} is overloaded, max resource usage with weight percentage: {}%, "
+ "CPU: {}%, MEMORY: {}%, DIRECT MEMORY: {}%, BANDWIDTH IN: {}%, "
+ "BANDWIDTH OUT: {}%, CPU weight: {}, MEMORY weight: {}, DIRECT MEMORY weight: {}, "
+ "BANDWIDTH IN weight: {}, BANDWIDTH OUT weight: {}",
broker, maxUsageWithWeight * 100,
localData.getCpu().percentUsage(), localData.getMemory().percentUsage(),
localData.getDirectMemory().percentUsage(), localData.getBandwidthIn().percentUsage(),
localData.getBandwidthOut().percentUsage(), conf.getLoadBalancerCPUResourceWeight(),
conf.getLoadBalancerMemoryResourceWeight(), conf.getLoadBalancerDirectMemoryResourceWeight(),
conf.getLoadBalancerBandwithInResourceWeight(),
conf.getLoadBalancerBandwithOutResourceWeight());
}

if (log.isDebugEnabled()) {
log.debug("Broker {} has max resource usage with weight percentage: {}%",
brokerData.getLocalData().getWebServiceUrl(), maxUsageWithWeight * 100);
}
return maxUsageWithWeight;
}

/**
* Update and get the max resource usage with weight of broker according to the service configuration.
*
* @param broker the broker name.
* @param brokerData The broker load data.
* @param conf The service configuration.
* @return the max resource usage with weight of broker
*/
private double updateAndGetMaxResourceUsageWithWeight(String broker, BrokerData brokerData,
ServiceConfiguration conf) {
final double historyPercentage = conf.getLoadBalancerHistoryResourcePercentage();
Double historyUsage = brokerAvgResourceUsageWithWeight.get(broker);
double resourceUsage = brokerData.getLocalData().getMaxResourceUsageWithWeight(
conf.getLoadBalancerCPUResourceWeight(),
conf.getLoadBalancerMemoryResourceWeight(),
conf.getLoadBalancerDirectMemoryResourceWeight(),
conf.getLoadBalancerBandwithInResourceWeight(),
conf.getLoadBalancerBandwithOutResourceWeight());
historyUsage = historyUsage == null
? resourceUsage : historyUsage * historyPercentage + (1 - historyPercentage) * resourceUsage;
if (log.isDebugEnabled()) {
log.debug(
"Broker {} get max resource usage with weight: {}, history resource percentage: {}%, CPU weight: "
+ "{}, MEMORY weight: {}, DIRECT MEMORY weight: {}, BANDWIDTH IN weight: {}, BANDWIDTH "
+ "OUT weight: {} ",
broker, historyUsage, historyPercentage, conf.getLoadBalancerCPUResourceWeight(),
conf.getLoadBalancerMemoryResourceWeight(), conf.getLoadBalancerDirectMemoryResourceWeight(),
conf.getLoadBalancerBandwithInResourceWeight(),
conf.getLoadBalancerBandwithOutResourceWeight());
}
brokerAvgResourceUsageWithWeight.put(broker, historyUsage);
return historyUsage;
}

/**
* Find a suitable broker to assign the given bundle to.
* This method is not thread safety.
*
* @param candidates The candidates for which the bundle may be assigned.
* @param bundleToAssign The data for the bundle to assign.
* @param loadData The load data from the leader broker.
* @param conf The service configuration.
* @return The name of the selected broker as it appears on ZooKeeper.
*/
@Override
public Optional<String> selectBroker(Set<String> candidates, BundleData bundleToAssign, LoadData loadData,
ServiceConfiguration conf) {
bestBrokers.clear();
// Maintain of list of all the best scoring brokers and then randomly
// select one of them at the end.
double totalUsage = 0.0d;
for (String broker : candidates) {
BrokerData brokerData = loadData.getBrokerData().get(broker);
double usageWithWeight = getMaxResourceUsageWithWeight(broker, brokerData, conf);
totalUsage += usageWithWeight;
}

final double avgUsage = totalUsage / candidates.size();
final double diffThreshold =
conf.getLoadBalancerAverageResourceUsageDifferenceThresholdPercentage() / 100.0;
brokerAvgResourceUsageWithWeight.forEach((broker, avgResUsage) -> {
if (avgResUsage + diffThreshold <= avgUsage) {
bestBrokers.add(broker);
}
});

if (bestBrokers.isEmpty()) {
// Assign randomly as all brokers are overloaded.
log.warn("Assign randomly as all {} brokers are overloaded.", candidates.size());
bestBrokers.addAll(candidates);
}

if (bestBrokers.isEmpty()) {
// If still, it means there are no available brokers at this point.
log.error("There are no available brokers as candidates at this point for bundle: {}", bundleToAssign);
return Optional.empty();
}

if (log.isDebugEnabled()) {
log.debug("Selected {} best brokers: {} from candidate brokers: {}", bestBrokers.size(), bestBrokers,
candidates);
}
return Optional.of(bestBrokers.get(ThreadLocalRandom.current().nextInt(bestBrokers.size())));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ public void initialize(final PulsarService pulsar) {
defaultStats.msgRateIn = DEFAULT_MESSAGE_RATE;
defaultStats.msgRateOut = DEFAULT_MESSAGE_RATE;

placementStrategy = ModularLoadManagerStrategy.create();
placementStrategy = ModularLoadManagerStrategy.create(conf);
policies = new SimpleResourceAllocationPolicies(pulsar);
filterPipeline.add(new BrokerVersionFilter());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import org.apache.pulsar.broker.ServiceConfiguration;
import org.apache.pulsar.broker.loadbalance.impl.LeastLongTermMessageRate;
import org.apache.pulsar.broker.loadbalance.impl.LeastResourceUsageWithWeight;
import org.apache.pulsar.policies.data.loadbalancer.LocalBrokerData;
import org.apache.pulsar.policies.data.loadbalancer.ResourceUsage;
import org.apache.pulsar.policies.data.loadbalancer.BrokerData;
Expand Down Expand Up @@ -59,6 +60,75 @@ public void testLeastLongTermMessageRate() {
assertEquals(strategy.selectBroker(brokerDataMap.keySet(), bundleData, loadData, conf), Optional.of("3"));
}

// Test that least resource usage with weight works correctly.
public void testLeastResourceUsageWithWeight() {
BundleData bundleData = new BundleData();
BrokerData brokerData1 = initBrokerData(10, 100);
BrokerData brokerData2 = initBrokerData(30, 100);
BrokerData brokerData3 = initBrokerData(60, 100);
LoadData loadData = new LoadData();
Map<String, BrokerData> brokerDataMap = loadData.getBrokerData();
brokerDataMap.put("1", brokerData1);
brokerDataMap.put("2", brokerData2);
brokerDataMap.put("3", brokerData3);
ServiceConfiguration conf = new ServiceConfiguration();
conf.setLoadBalancerCPUResourceWeight(1.0);
conf.setLoadBalancerMemoryResourceWeight(0.1);
conf.setLoadBalancerDirectMemoryResourceWeight(0.1);
conf.setLoadBalancerBandwithInResourceWeight(1.0);
conf.setLoadBalancerBandwithOutResourceWeight(1.0);
conf.setLoadBalancerHistoryResourcePercentage(0.5);
conf.setLoadBalancerAverageResourceUsageDifferenceThresholdPercentage(5);

ModularLoadManagerStrategy strategy = new LeastResourceUsageWithWeight();
assertEquals(strategy.selectBroker(brokerDataMap.keySet(), bundleData, loadData, conf), Optional.of("1"));

brokerData1 = initBrokerData(20,100);
brokerData2 = initBrokerData(30,100);
brokerData3 = initBrokerData(50,100);
brokerDataMap.put("1", brokerData1);
brokerDataMap.put("2", brokerData2);
brokerDataMap.put("3", brokerData3);
assertEquals(strategy.selectBroker(brokerDataMap.keySet(), bundleData, loadData, conf), Optional.of("1"));

brokerData1 = initBrokerData(30,100);
brokerData2 = initBrokerData(30,100);
brokerData3 = initBrokerData(40,100);
brokerDataMap.put("1", brokerData1);
brokerDataMap.put("2", brokerData2);
brokerDataMap.put("3", brokerData3);
assertEquals(strategy.selectBroker(brokerDataMap.keySet(), bundleData, loadData, conf), Optional.of("1"));

brokerData1 = initBrokerData(30,100);
brokerData2 = initBrokerData(30,100);
brokerData3 = initBrokerData(40,100);
brokerDataMap.put("1", brokerData1);
brokerDataMap.put("2", brokerData2);
brokerDataMap.put("3", brokerData3);
assertEquals(strategy.selectBroker(brokerDataMap.keySet(), bundleData, loadData, conf), Optional.of("1"));

brokerData1 = initBrokerData(35,100);
brokerData2 = initBrokerData(20,100);
brokerData3 = initBrokerData(45,100);
brokerDataMap.put("1", brokerData1);
brokerDataMap.put("2", brokerData2);
brokerDataMap.put("3", brokerData3);
assertEquals(strategy.selectBroker(brokerDataMap.keySet(), bundleData, loadData, conf), Optional.of("2"));
}

private BrokerData initBrokerData(double usage, double limit) {
LocalBrokerData localBrokerData = new LocalBrokerData();
localBrokerData.setCpu(new ResourceUsage(usage, limit));
localBrokerData.setMemory(new ResourceUsage(usage, limit));
localBrokerData.setDirectMemory(new ResourceUsage(usage, limit));
localBrokerData.setBandwidthIn(new ResourceUsage(usage, limit));
localBrokerData.setBandwidthOut(new ResourceUsage(usage, limit));
BrokerData brokerData = new BrokerData(localBrokerData);
TimeAverageBrokerData timeAverageBrokerData = new TimeAverageBrokerData();
brokerData.setTimeAverageData(timeAverageBrokerData);
return brokerData;
}

private BrokerData initBrokerData() {
LocalBrokerData localBrokerData = new LocalBrokerData();
localBrokerData.setCpu(new ResourceUsage());
Expand Down

0 comments on commit d5088e3

Please sign in to comment.