diff --git a/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java b/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java index 0bb47c02c..72b134f7d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java +++ b/cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java @@ -54,12 +54,12 @@ public class ServiceEntity extends Entity { /** * Determines the default minimum memory per frame. */ - public long minMemory = Dispatcher.MEM_RESERVED_DEFAULT; + public long minMemory = Dispatcher.MEM_SERVICE_RESERVED_DEFAULT; /** * Determines the default minimum gpu per frame. */ - public long minGpuMemory = Dispatcher.MEM_GPU_RESERVED_DEFAULT; + public long minGpuMemory = Dispatcher.MEM_SERVICE_GPU_RESERVED_DEFAULT; /** * Determines the default tags. diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 8205f3021..e89ced6ce 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -148,7 +148,7 @@ else if (proc.coresReserved >= 100) { proc.coresReserved = wholeCores * 100; } else { if (frame.threadable) { - if (selfishServices != null && + if (selfishServices != null && frame.services != null && containsSelfishService(frame.services.split(","), selfishServices)){ proc.coresReserved = wholeCores * 100; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index 223737042..244a4778b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -30,6 +30,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.jdbc.core.CallableStatementCreator; import org.springframework.jdbc.core.RowMapper; @@ -58,6 +60,9 @@ public class HostDaoJdbc extends JdbcDaoSupport implements HostDao { + @Autowired + private Environment env; + public static final RowMapper HOST_DETAIL_MAPPER = new RowMapper() { public HostEntity mapRow(ResultSet rs, int rowNum) throws SQLException { HostEntity host = new HostEntity(); @@ -324,9 +329,12 @@ public void insertRenderHost(RenderHost host, AllocationInterface a, boolean use } long memUnits = convertMemoryUnits(host); - if (memUnits < Dispatcher.MEM_RESERVED_MIN) { + long memReserverMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + if (memUnits < memReserverMin) { throw new EntityCreationError("could not create host " + host.getName() + ", " + - " must have at least " + Dispatcher.MEM_RESERVED_MIN + " free memory."); + " must have at least " + memReserverMin + " free memory."); } String fqdn; @@ -727,10 +735,10 @@ private long convertMemoryUnits(RenderHost host) { long memUnits; if (host.getTagsList().contains("64bit")) { - memUnits = CueUtil.convertKbToFakeKb64bit(host.getTotalMem()); + memUnits = CueUtil.convertKbToFakeKb64bit(env, host.getTotalMem()); } else { - memUnits = CueUtil.convertKbToFakeKb32bit(host.getTotalMem()); + memUnits = CueUtil.convertKbToFakeKb32bit(env, host.getTotalMem()); } /* @@ -738,7 +746,10 @@ private long convertMemoryUnits(RenderHost host) { * so we don't annoy the user. */ if (host.getNimbyEnabled()) { - memUnits = (long) (memUnits / 1.5) + Dispatcher.MEM_RESERVED_SYSTEM; + long memReservedSystem = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_system", + Long.class); + memUnits = (long) (memUnits / 1.5) + memReservedSystem; } return memUnits; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java index 78753f578..a78475c46 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java @@ -33,6 +33,8 @@ import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.jdbc.core.RowMapper; import org.springframework.jdbc.core.support.JdbcDaoSupport; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import com.imageworks.spcue.ExecutionSummary; import com.imageworks.spcue.FrameStateTotals; @@ -41,7 +43,6 @@ import com.imageworks.spcue.LayerEntity; import com.imageworks.spcue.LayerInterface; import com.imageworks.spcue.LimitEntity; -import com.imageworks.spcue.LimitInterface; import com.imageworks.spcue.ResourceUsage; import com.imageworks.spcue.ThreadStats; import com.imageworks.spcue.dao.LayerDao; @@ -56,6 +57,7 @@ import org.apache.logging.log4j.LogManager; public class LayerDaoJdbc extends JdbcDaoSupport implements LayerDao { + private final long MEM_RESERVED_MIN; private static final Logger logger = LogManager.getLogger(LayerDaoJdbc.class); private static final String INSERT_OUTPUT_PATH = "INSERT INTO " + @@ -67,6 +69,14 @@ public class LayerDaoJdbc extends JdbcDaoSupport implements LayerDao { "str_filespec " + ") VALUES (?,?,?,?)"; + @Autowired + public LayerDaoJdbc(Environment env) { + this.MEM_RESERVED_MIN = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class + ); + } + @Override public void insertLayerOutput(LayerInterface layer, String filespec) { getJdbcTemplate().update( @@ -341,8 +351,8 @@ public void insertLayerDetail(LayerDetail l) { @Override public void updateLayerMinMemory(LayerInterface layer, long val) { - if (val < Dispatcher.MEM_RESERVED_MIN) { - val = Dispatcher.MEM_RESERVED_MIN; + if (val < MEM_RESERVED_MIN) { + val = MEM_RESERVED_MIN; } getJdbcTemplate().update("UPDATE layer SET int_mem_min=? WHERE pk_layer=?", val, layer.getLayerId()); @@ -380,8 +390,8 @@ public boolean balanceLayerMinMemory(LayerInterface layer, long frameMaxRss) { if (maxrss < frameMaxRss) { maxrss = frameMaxRss; } - if (maxrss < Dispatcher.MEM_RESERVED_MIN) { - maxrss = Dispatcher.MEM_RESERVED_MIN; + if (maxrss < MEM_RESERVED_MIN) { + maxrss = MEM_RESERVED_MIN; } else { maxrss = maxrss + CueUtil.MB256; } @@ -603,11 +613,11 @@ public long findPastMaxRSS(JobInterface job, String name) { try { long maxRss = getJdbcTemplate().queryForObject(FIND_PAST_MAX_RSS, Long.class, job.getJobId(), name); - if (maxRss >= Dispatcher.MEM_RESERVED_MIN) { + if (maxRss >= MEM_RESERVED_MIN) { return maxRss; } else { - return Dispatcher.MEM_RESERVED_MIN; + return MEM_RESERVED_MIN; } } catch (EmptyResultDataAccessException e) { // Actually want to return 0 here, which means @@ -625,8 +635,8 @@ public void updateTags(JobInterface job, String tags, LayerType type) { @Override public void updateMinMemory(JobInterface job, long mem, LayerType type) { - if (mem < Dispatcher.MEM_RESERVED_MIN) { - mem = Dispatcher.MEM_RESERVED_MIN; + if (mem < MEM_RESERVED_MIN) { + mem = MEM_RESERVED_MIN; } getJdbcTemplate().update( "UPDATE layer SET int_mem_min=? WHERE pk_job=? AND str_type=?", diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java index ecf39caf7..cf54eb85d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/ProcDaoJdbc.java @@ -29,6 +29,8 @@ import java.util.List; import java.util.Map; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import org.springframework.dao.DataAccessException; import org.springframework.jdbc.core.PreparedStatementCreator; import org.springframework.jdbc.core.RowMapper; @@ -45,7 +47,6 @@ import com.imageworks.spcue.dao.ProcDao; import com.imageworks.spcue.dao.criteria.FrameSearchInterface; import com.imageworks.spcue.dao.criteria.ProcSearchInterface; -import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.dispatcher.ResourceDuplicationFailureException; import com.imageworks.spcue.dispatcher.ResourceReservationFailureException; import com.imageworks.spcue.grpc.host.HardwareState; @@ -53,6 +54,9 @@ public class ProcDaoJdbc extends JdbcDaoSupport implements ProcDao { + @Autowired + private Environment env; + private static final String VERIFY_RUNNING_PROC = "SELECT " + "proc.pk_frame " + @@ -121,15 +125,21 @@ public boolean deleteVirtualProc(VirtualProc proc) { public void insertVirtualProc(VirtualProc proc) { proc.id = SqlUtil.genKeyRandom(); + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + long memGpuReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_min", + Long.class); int result = 0; try { result = getJdbcTemplate().update(INSERT_VIRTUAL_PROC, proc.getProcId(), proc.getHostId(), proc.getShowId(), proc.getLayerId(), proc.getJobId(), proc.getFrameId(), proc.coresReserved, proc.memoryReserved, - proc.memoryReserved, Dispatcher.MEM_RESERVED_MIN, + proc.memoryReserved, memReservedMin, proc.gpusReserved, proc.gpuMemoryReserved, - proc.gpuMemoryReserved, Dispatcher.MEM_GPU_RESERVED_MIN, + proc.gpuMemoryReserved, memGpuReservedMin, proc.isLocalDispatch); // Update all of the resource counts @@ -634,7 +644,10 @@ public boolean balanceUnderUtilizedProcs(ProcInterface targetProc, long targetMe for (Map map: result) { String pk_proc = (String) map.get("pk_proc"); Long free_mem = (Long) map.get("free_mem"); - long available = free_mem - borrowMap.get(pk_proc) - Dispatcher.MEM_RESERVED_MIN; + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + long available = free_mem - borrowMap.get(pk_proc) - memReservedMin; if (available > memPerFrame) { borrowMap.put(pk_proc, borrowMap.get(pk_proc) + memPerFrame); memBorrowedTotal = memBorrowedTotal + memPerFrame; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index 226d9466c..c5ea11cb6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -99,7 +99,10 @@ public class CoreUnitDispatcher implements Dispatcher { public boolean testMode = false; - @Autowired + private final long MEM_RESERVED_MIN; + private final long MEM_GPU_RESERVED_DEFAULT; + private final long MEM_GPU_RESERVED_MIN; + private Environment env; /* @@ -108,6 +111,14 @@ public class CoreUnitDispatcher implements Dispatcher { */ private Cache jobLock; + @Autowired + public CoreUnitDispatcher(Environment env) { + this.env = env; + MEM_RESERVED_MIN = getLongProperty("dispatcher.memory.mem_reserved_min"); + MEM_GPU_RESERVED_DEFAULT = getLongProperty("dispatcher.memory.mem_gpu_reserved_default"); + MEM_GPU_RESERVED_MIN = getLongProperty("dispatcher.memory.mem_gpu_reserved_min"); + } + /* * Return an integer value from the opencue.properties given a key */ @@ -115,6 +126,13 @@ private int getIntProperty(String property) { return env.getRequiredProperty(property, Integer.class); } + /* + * Return an integer value from the opencue.properties given a key + */ + private long getLongProperty(String property) { + return env.getRequiredProperty(property, Long.class); + } + private Cache getOrCreateJobLock() { if (jobLock == null) { this.jobLock = CacheBuilder.newBuilder() @@ -134,10 +152,10 @@ private List dispatchJobs(DispatchHost host, Set jobs) { for (String jobid: jobs) { if (!host.hasAdditionalResources( - Dispatcher.CORE_POINTS_RESERVED_MIN, - Dispatcher.MEM_RESERVED_MIN, - Dispatcher.GPU_UNITS_RESERVED_MIN, - Dispatcher.MEM_GPU_RESERVED_MIN)) { + CORE_POINTS_RESERVED_MIN, + MEM_RESERVED_MIN, + GPU_UNITS_RESERVED_MIN, + MEM_GPU_RESERVED_MIN)) { return procs; } @@ -174,15 +192,13 @@ private List dispatchJobs(DispatchHost host, Set jobs) { private Set getGpuJobs(DispatchHost host, ShowInterface show) { Set jobs = null; - // TODO: GPU: make index with the 4 components instead of just 3, replace the just 3 - // If the host has gpu idle, first do a query to find gpu jobs // If no gpu jobs found remove resources to leave room for a gpu frame if (host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_DEFAULT, - Dispatcher.MEM_RESERVED_MIN, + this.MEM_RESERVED_MIN, Dispatcher.GPU_UNITS_RESERVED_DEFAULT, - Dispatcher.MEM_GPU_RESERVED_DEFAULT)) { + this.MEM_GPU_RESERVED_DEFAULT)) { if (show == null) jobs = dispatchSupport.findDispatchJobs(host, getIntProperty("dispatcher.job_query_max")); @@ -312,9 +328,9 @@ public void wrapDispatchFrame() { host.useResources(proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved); if (!host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, - Dispatcher.MEM_RESERVED_MIN, + MEM_RESERVED_MIN, Dispatcher.GPU_UNITS_RESERVED_MIN, - Dispatcher.MEM_GPU_RESERVED_MIN)) { + MEM_GPU_RESERVED_MIN)) { break; } else if (procs.size() >= getIntProperty("dispatcher.job_frame_dispatch_max")) { diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java index 19a725bd3..3bb1ae105 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/Dispatcher.java @@ -51,33 +51,15 @@ public interface Dispatcher { // on the host. public static final int CORE_LOAD_THRESHOLD = 5; - - // The default amount of memory reserved for a frame if no memory - // reservation settings are specified - public static final long MEM_RESERVED_DEFAULT = 3355443; - - // The maximum amount of memory that can be requested for a given frame. - public static final long MEM_RESERVED_MAX = CueUtil.GB * 50; - - // The minimum amount of memory that can be assigned to a frame. - public static final long MEM_RESERVED_MIN = 262144; - - // Memory reserved by system, gets chopped off the available memory - public static final long MEM_RESERVED_SYSTEM = 524288; - // Amount of memory that has to be idle for the rest of the cores // on the machine to be considered stranded. public static final long MEM_STRANDED_THRESHHOLD = CueUtil.GB + CueUtil.MB512; - // The default amount of gpu memory reserved for a frame if no gpu memory - // reservation settings are specified - public static final long MEM_GPU_RESERVED_DEFAULT = 0; - - // The minimum amount of gpu memory that can be assigned to a frame. - public static final long MEM_GPU_RESERVED_MIN = 0; + // Determines the service default minimum memory per frame. + public static final long MEM_SERVICE_RESERVED_DEFAULT = CueUtil.GB4; - // The maximum amount of gpu memory that can be assigned to a frame. - public static final long MEM_GPU_RESERVED_MAX = CueUtil.GB * 1024; + // Determines the service default minimum gpu per frame. + public static final long MEM_SERVICE_GPU_RESERVED_DEFAULT = 0; // Return value for cleared frame public static final int EXIT_STATUS_FRAME_CLEARED = 299; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index b0a7ccd9c..6c02fd184 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -254,7 +254,10 @@ public void handleHostReport(HostReport report, boolean isBoot) { bookingManager.removeInactiveLocalHostAssignment(lca); } } - + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + if (!isTempDirStorageEnough(report.getHost().getTotalMcp(), report.getHost().getFreeMcp(), host.os)) { msg = String.format( "%s doesn't have enough free space in the temporary directory (mcp), %dMB", @@ -264,13 +267,13 @@ else if (coresToReserve <= 0 || host.idleCores < Dispatcher.CORE_POINTS_RESERVED msg = String.format("%s doesn't have enough idle cores, %d needs %d", host.name, host.idleCores, Dispatcher.CORE_POINTS_RESERVED_MIN); } - else if (host.idleMemory < Dispatcher.MEM_RESERVED_MIN) { + else if (host.idleMemory < memReservedMin) { msg = String.format("%s doesn't have enough idle memory, %d needs %d", - host.name, host.idleMemory, Dispatcher.MEM_RESERVED_MIN); + host.name, host.idleMemory, memReservedMin); } else if (report.getHost().getFreeMem() < CueUtil.MB512) { msg = String.format("%s doesn't have enough free system mem, %d needs %d", - host.name, report.getHost().getFreeMem(), Dispatcher.MEM_RESERVED_MIN); + host.name, report.getHost().getFreeMem(), memReservedMin); } else if(!host.hardwareState.equals(HardwareState.UP)) { msg = host + " is not in the Up state."; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java index 288965a04..ffd205d32 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/LocalDispatcher.java @@ -24,6 +24,8 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import org.springframework.dao.EmptyResultDataAccessException; import com.imageworks.spcue.DispatchFrame; @@ -42,6 +44,9 @@ public class LocalDispatcher extends AbstractDispatcher implements Dispatcher { + @Autowired + private Environment env; + private static final Logger logger = LogManager.getLogger(LocalDispatcher.class); @@ -139,15 +144,21 @@ private List dispatchHost(DispatchHost host, JobInterface job, procs.add(proc); + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + long memGpuReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_min", + Long.class); /* * This should stay here and not go into VirtualProc * or else the count will be off if you fail to book. */ lha.useResources(proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved); if (!lha.hasAdditionalResources(lha.getThreads() * 100, - Dispatcher.MEM_RESERVED_MIN, + memReservedMin, Dispatcher.GPU_UNITS_RESERVED_MIN, - Dispatcher.MEM_GPU_RESERVED_MIN)) { + memGpuReservedMin)) { break; } @@ -226,15 +237,22 @@ private List dispatchHost(DispatchHost host, LayerInterface layer, procs.add(proc); + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + long memGpuReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_min", + Long.class); + /* * This should stay here and not go into VirtualProc * or else the count will be off if you fail to book. */ lha.useResources(proc.coresReserved, proc.memoryReserved, proc.gpusReserved, proc.gpuMemoryReserved); if (!lha.hasAdditionalResources(100, - Dispatcher.MEM_RESERVED_MIN, + memReservedMin, Dispatcher.GPU_UNITS_RESERVED_MIN, - Dispatcher.MEM_GPU_RESERVED_MIN)) { + memGpuReservedMin)) { break; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java index 64329fc0f..edb5c2b62 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/commands/DispatchBookHost.java @@ -19,17 +19,14 @@ package com.imageworks.spcue.dispatcher.commands; -import java.util.List; -import java.util.ArrayList; -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import com.imageworks.spcue.DispatchHost; import com.imageworks.spcue.GroupInterface; import com.imageworks.spcue.JobInterface; import com.imageworks.spcue.ShowInterface; import com.imageworks.spcue.dispatcher.Dispatcher; -import com.imageworks.spcue.VirtualProc; /** * A command for booking a host. @@ -37,9 +34,9 @@ * @category command */ public class DispatchBookHost extends KeyRunnable { - private static final Logger logger = - LogManager.getLogger(DispatchBookHost.class); + @Autowired + private Environment env; private ShowInterface show = null; private GroupInterface group = null; private JobInterface job = null; @@ -90,21 +87,27 @@ else if (group != null) { else if (job != null) { dispatcher.dispatchHost(host, job); } + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + long memGpuReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_min", + Long.class); // Try to book any remaining resources if (host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, - Dispatcher.MEM_RESERVED_MIN, + memReservedMin, Dispatcher.GPU_UNITS_RESERVED_MIN, - Dispatcher.MEM_GPU_RESERVED_MIN)) { + memGpuReservedMin)) { dispatcher.dispatchHost(host); } if (host.hasAdditionalResources( Dispatcher.CORE_POINTS_RESERVED_MIN, - Dispatcher.MEM_RESERVED_MIN, + memReservedMin, Dispatcher.GPU_UNITS_RESERVED_MIN, - Dispatcher.MEM_GPU_RESERVED_MIN)) { + memGpuReservedMin)) { dispatcher.dispatchHostToAllShows(host); } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 2e2fa0801..3ec17e62d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -38,6 +38,8 @@ import org.jdom.Document; import org.jdom.Element; import org.jdom.input.SAXBuilder; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import org.springframework.dao.EmptyResultDataAccessException; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; @@ -58,6 +60,9 @@ import com.imageworks.spcue.util.CueUtil; public class JobSpec { + @Autowired + private Environment env; + private static final Logger logger = LogManager.getLogger(JobSpec.class); private String facility; @@ -513,19 +518,25 @@ private void determineMinimumMemory(BuildableJob buildableJob, String memory = layerTag.getChildTextTrim("memory").toLowerCase(); minMemory = convertMemoryInput(memory); + long memReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_min", + Long.class); + long memReservedMax = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_max", + Long.class); // Some quick sanity checks to make sure memory hasn't gone // over or under reasonable defaults. - if (minMemory > Dispatcher.MEM_RESERVED_MAX) { + if (minMemory > memReservedMax) { logger.warn("Setting memory for " + buildableJob.detail.name + - "/" + layer.name + " to: "+ Dispatcher.MEM_RESERVED_MAX); - layer.minimumMemory = Dispatcher.MEM_RESERVED_MAX; + "/" + layer.name + " to: "+ memReservedMax); + layer.minimumMemory = memReservedMax; } - else if (minMemory < Dispatcher.MEM_RESERVED_MIN) { + else if (minMemory < memReservedMin) { logger.warn(buildableJob.detail.name + "/" + layer.name + "Specified too little memory, defaulting to: " + - Dispatcher.MEM_RESERVED_MIN); - minMemory = Dispatcher.MEM_RESERVED_MIN; + memReservedMin); + minMemory = memReservedMin; } buildableLayer.isMemoryOverride = true; @@ -560,18 +571,24 @@ private void determineMinimumGpuMemory(BuildableJob buildableJob, Element layerT long minGpuMemory; try { minGpuMemory = convertMemoryInput(memory); + long memGpuReservedMin = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_min", + Long.class); + long memGpuReservedMax = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_max", + Long.class); // Some quick sanity checks to make sure gpu memory hasn't gone // over or under reasonable defaults. - if (minGpuMemory > Dispatcher.MEM_GPU_RESERVED_MAX) { + if (minGpuMemory > memGpuReservedMax) { throw new SpecBuilderException("Gpu memory requirements exceed " + "maximum. Are you specifying the correct units?"); } - else if (minGpuMemory < Dispatcher.MEM_GPU_RESERVED_MIN) { + else if (minGpuMemory < memGpuReservedMin) { logger.warn(buildableJob.detail.name + "/" + layer.name + "Specified too little gpu memory, defaulting to: " + - Dispatcher.MEM_GPU_RESERVED_MIN); - minGpuMemory = Dispatcher.MEM_GPU_RESERVED_MIN; + memGpuReservedMin); + minGpuMemory = memGpuReservedMin; } layer.minimumGpuMemory = minGpuMemory; @@ -580,7 +597,9 @@ else if (minGpuMemory < Dispatcher.MEM_GPU_RESERVED_MIN) { logger.info("Error setting gpu memory for " + buildableJob.detail.name + "/" + layer.name + " failed, reason: " + e + ". Using default."); - layer.minimumGpuMemory = Dispatcher.MEM_GPU_RESERVED_DEFAULT; + layer.minimumGpuMemory = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_min", + Long.class); } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java b/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java index 67d4d17e1..20e91147f 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java +++ b/cuebot/src/main/java/com/imageworks/spcue/util/CueUtil.java @@ -230,12 +230,18 @@ public static final String KbToMb(long kb) { return String.format("%dMB", kb / 1024); } - public static final long convertKbToFakeKb64bit(long Kb) { - return (long) (Math.ceil((Kb * 0.0009765625) * 0.0009765625) * 1048576) - Dispatcher.MEM_RESERVED_SYSTEM; + public static final long convertKbToFakeKb64bit(Environment env, long Kb) { + long memReservedSystem = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_system", + Long.class); + return (long) (Math.ceil((Kb * 0.0009765625) * 0.0009765625) * 1048576) - memReservedSystem; } - public static final long convertKbToFakeKb32bit(long Kb) { - return (long) (Math.floor((Kb * 0.0009765625) * 0.0009765625) * 1048576) - Dispatcher.MEM_RESERVED_SYSTEM; + public static final long convertKbToFakeKb32bit(Environment env, long Kb) { + long memReservedSystem = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_system", + Long.class); + return (long) (Math.floor((Kb * 0.0009765625) * 0.0009765625) * 1048576) - memReservedSystem; } /** diff --git a/cuebot/src/main/resources/opencue.properties b/cuebot/src/main/resources/opencue.properties index b40fbc9c6..0340da60e 100644 --- a/cuebot/src/main/resources/opencue.properties +++ b/cuebot/src/main/resources/opencue.properties @@ -148,6 +148,34 @@ dispatcher.oom_frame_overboard_allowed_threshold=-1.0 # the frame as stuck dispatcher.frame_kill_retry_limit=3 +# The default amount of memory reserved for a frame if no memory +# reservation settings are specified. +# Default = 4GB +dispatcher.memory.mem_reserved_default = 3355443 + +# The maximum amount of memory that can be requested for a given frame. +# Default = 50GB +dispatcher.memory.mem_reserved_max = 52428800 + +# The minimum amount of memory that can be assigned to a frame. +# Default = 250MB +dispatcher.memory.mem_reserved_min = 262144 + +# Memory reserved by system, gets chopped off the available memory +# Default = 500MB +dispatcher.memory.mem_reserved_system = 524288 + +# The default amount of gpu memory reserved for a frame if no gpu memory +# reservation settings are specified +dispatcher.memory.mem_gpu_reserved_default = 0 + +# The minimum amount of gpu memory that can be assigned to a frame. +dispatcher.memory.mem_gpu_reserved_min = 0 + +# The maximum amount of gpu memory that can be assigned to a frame. +# Default = 100GB +dispatcher.memory.mem_gpu_reserved_max = 104857600 + # Whether to satisfy dependents (*_ON_FRAME and *_ON_LAYER) only on Frame success depend.satisfy_only_on_frame_success=true diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java index 918b43679..09b3d3ccd 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java @@ -43,7 +43,6 @@ import com.imageworks.spcue.dao.AllocationDao; import com.imageworks.spcue.dao.FacilityDao; import com.imageworks.spcue.dao.HostDao; -import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.grpc.host.HardwareState; import com.imageworks.spcue.grpc.host.HostTagType; import com.imageworks.spcue.grpc.host.LockState; @@ -82,6 +81,10 @@ public class HostDaoTests extends AbstractTransactionalJUnit4SpringContextTests public HostDaoTests() { } + // Hardcoded value of dispatcher.memory.mem_reserved_system + // to avoid having to read opencue.properties on a test setting + private final long MEM_RESERVED_SYSTEM = 524288; + public static RenderHost buildRenderHost(String name) { RenderHost host = RenderHost.newBuilder() .setName(name) @@ -131,7 +134,7 @@ public void testInsertHost() { hostManager.getDefaultAllocationDetail(), false); - assertEquals(Long.valueOf(CueUtil.GB16 - Dispatcher.MEM_RESERVED_SYSTEM), jdbcTemplate.queryForObject( + assertEquals(Long.valueOf(CueUtil.GB16 - this.MEM_RESERVED_SYSTEM), jdbcTemplate.queryForObject( "SELECT int_mem FROM host WHERE str_name=?", Long.class, TEST_HOST)); } @@ -272,7 +275,7 @@ public void testInsertHostDesktop() { hostManager.getDefaultAllocationDetail(), false); - assertEquals(Long.valueOf(CueUtil.GB16 - Dispatcher.MEM_RESERVED_SYSTEM), jdbcTemplate.queryForObject( + assertEquals(Long.valueOf(CueUtil.GB16 - this.MEM_RESERVED_SYSTEM), jdbcTemplate.queryForObject( "SELECT int_mem FROM host WHERE str_name=?", Long.class, TEST_HOST)); } @@ -482,9 +485,9 @@ public void updateHostResources() { // Verify what the original values are assertEquals(800, dispatchHost.cores); assertEquals(800, dispatchHost.idleCores); - assertEquals(CueUtil.GB16 - Dispatcher.MEM_RESERVED_SYSTEM, + assertEquals(CueUtil.GB16 - this.MEM_RESERVED_SYSTEM, dispatchHost.idleMemory); - assertEquals(CueUtil.GB16- Dispatcher.MEM_RESERVED_SYSTEM, + assertEquals(CueUtil.GB16- this.MEM_RESERVED_SYSTEM, dispatchHost.memory); dispatchHost = hostDao.findDispatchHost(TEST_HOST); @@ -492,9 +495,9 @@ public void updateHostResources() { // Now verify they've changed. assertEquals(2400, dispatchHost.cores); assertEquals(2400, dispatchHost.idleCores); - assertEquals(CueUtil.GB32 - Dispatcher.MEM_RESERVED_SYSTEM, + assertEquals(CueUtil.GB32 - this.MEM_RESERVED_SYSTEM, dispatchHost.idleMemory); - assertEquals(CueUtil.GB32- Dispatcher.MEM_RESERVED_SYSTEM, + assertEquals(CueUtil.GB32- this.MEM_RESERVED_SYSTEM, dispatchHost.memory); } diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java index 189178689..6354595ec 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/LayerDaoTests.java @@ -281,7 +281,9 @@ public void testUpdateLayerMinMemory() { */ layerDao.updateLayerMinMemory(layer, 8096); LayerDetail l2 = layerDao.findLayerDetail(getJob(), "pass_1"); - assertEquals(l2.minimumMemory, Dispatcher.MEM_RESERVED_MIN); + // Hardcoded value of dispatcher.memory.mem_reserved_min + // to avoid having to read opencue.properties on a test setting + assertEquals(l2.minimumMemory, 262144); /* * Check regular operation. diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java index 0cbe09970..6620116c8 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/ProcDaoTests.java @@ -28,6 +28,7 @@ import org.junit.Rule; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import org.springframework.test.annotation.Rollback; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractTransactionalJUnit4SpringContextTests; @@ -75,6 +76,9 @@ public class ProcDaoTests extends AbstractTransactionalJUnit4SpringContextTests @Rule public AssumingPostgresEngine assumingPostgresEngine; + @Autowired + private Environment env; + @Resource ProcDao procDao; @@ -113,6 +117,9 @@ public class ProcDaoTests extends AbstractTransactionalJUnit4SpringContextTests private static String PK_ALLOC = "00000000-0000-0000-0000-000000000000"; + private long MEM_RESERVED_DEFAULT; + private long MEM_GPU_RESERVED_DEFAULT; + public DispatchHost createHost() { RenderHost host = RenderHost.newBuilder() @@ -149,6 +156,12 @@ public JobDetail launchJob() { public void setDispatcherTestMode() { dispatcher.setTestMode(true); jobLauncher.testMode = true; + this.MEM_RESERVED_DEFAULT = env.getRequiredProperty( + "dispatcher.memory.mem_reserved_default", + Long.class); + this.MEM_GPU_RESERVED_DEFAULT = env.getRequiredProperty( + "dispatcher.memory.mem_gpu_reserved_default", + Long.class); } @Test @@ -587,10 +600,10 @@ public void testGetReservedMemory() { procDao.insertVirtualProc(proc); VirtualProc _proc = procDao.findVirtualProc(frame); - assertEquals(Long.valueOf(Dispatcher.MEM_RESERVED_DEFAULT), jdbcTemplate.queryForObject( + assertEquals(Long.valueOf(this.MEM_RESERVED_DEFAULT), jdbcTemplate.queryForObject( "SELECT int_mem_reserved FROM proc WHERE pk_proc=?", Long.class, _proc.id)); - assertEquals(Dispatcher.MEM_RESERVED_DEFAULT, + assertEquals(this.MEM_RESERVED_DEFAULT, procDao.getReservedMemory(_proc)); } @@ -609,10 +622,10 @@ public void testGetReservedGpuMemory() { procDao.insertVirtualProc(proc); VirtualProc _proc = procDao.findVirtualProc(frame); - assertEquals(Long.valueOf(Dispatcher.MEM_GPU_RESERVED_DEFAULT), jdbcTemplate.queryForObject( + assertEquals(Long.valueOf(this.MEM_GPU_RESERVED_DEFAULT), jdbcTemplate.queryForObject( "SELECT int_gpu_mem_reserved FROM proc WHERE pk_proc=?", Long.class, _proc.id)); - assertEquals(Dispatcher.MEM_GPU_RESERVED_DEFAULT, + assertEquals(this.MEM_GPU_RESERVED_DEFAULT, procDao.getReservedGpuMemory(_proc)); } @@ -655,18 +668,18 @@ public void testBalanceUnderUtilizedProcs() { layerDao.updateLayerMaxRSS(frame3,300000, true); procDao.balanceUnderUtilizedProcs(proc3, 100000); - procDao.increaseReservedMemory(proc3, Dispatcher.MEM_RESERVED_DEFAULT + 100000); + procDao.increaseReservedMemory(proc3, this.MEM_RESERVED_DEFAULT + 100000); // Check the target proc VirtualProc targetProc = procDao.getVirtualProc(proc3.getId()); - assertEquals( Dispatcher.MEM_RESERVED_DEFAULT+ 100000, targetProc.memoryReserved); + assertEquals( this.MEM_RESERVED_DEFAULT + 100000, targetProc.memoryReserved); // Check other procs VirtualProc firstProc = procDao.getVirtualProc(proc1.getId()); - assertEquals( Dispatcher.MEM_RESERVED_DEFAULT - 50000 -1 , firstProc.memoryReserved); + assertEquals( this.MEM_RESERVED_DEFAULT - 50000 -1 , firstProc.memoryReserved); VirtualProc secondProc = procDao.getVirtualProc(proc2.getId()); - assertEquals(Dispatcher.MEM_RESERVED_DEFAULT - 50000 -1, secondProc.memoryReserved); + assertEquals(this.MEM_RESERVED_DEFAULT - 50000 -1, secondProc.memoryReserved); } diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java index 2d34868f8..b8368e1f8 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/WhiteboardDaoTests.java @@ -268,17 +268,19 @@ public ActionEntity createAction(FilterEntity f) { } public RenderHost getRenderHost() { - + // Hardcoded value of dispatcher.memory.mem_reserved_min + // to avoid having to read opencue.properties on a test setting + long memReservedMin = 262144; RenderHost host = RenderHost.newBuilder() .setName(HOST) .setBootTime(1192369572) // The minimum amount of free space in the temporary directory to book a host. .setFreeMcp(CueUtil.GB) - .setFreeMem((int) Dispatcher.MEM_RESERVED_MIN * 4) + .setFreeMem((int) memReservedMin * 4) .setFreeSwap(2076) .setLoad(1) .setTotalMcp(CueUtil.GB4) - .setTotalMem((int) Dispatcher.MEM_RESERVED_MIN * 4) + .setTotalMem((int) memReservedMin * 4) .setTotalSwap(2096) .setNimbyEnabled(true) .setNumProcs(2) @@ -1312,7 +1314,7 @@ public FrameStateDisplayOverride createFrameStateDisplayOverride(String frameId) @Transactional @Rollback(true) public void testFramesWithDisplayOverride() { - // since current_timestamp does not update, we need to make sure the + // since current_timestamp does not update, we need to make sure the // timestamp we use when retrieving updated frames is older than when // the frame's ts_updated value is set to during insertion. long timestamp = System.currentTimeMillis(); @@ -1324,7 +1326,7 @@ public void testFramesWithDisplayOverride() { FrameStateDisplayOverride override = createFrameStateDisplayOverride(frame.getFrameId()); FrameStateDisplayOverrideSeq results = frameDao.getFrameStateDisplayOverrides(frame.getFrameId()); assertEquals(1, results.getOverridesCount()); - + frameDao.updateFrameState(frame, FrameState.SUCCEEDED); // Test GET_FRAME @@ -1337,7 +1339,7 @@ public void testFramesWithDisplayOverride() { new ArrayList(), (int) (timestamp / 1000)); UpdatedFrameSeq uFrames = rs.getUpdatedFrames(); // We'll end up getting all the frames for the job so we need to find - // the one we want. + // the one we want. for (UpdatedFrame uFrame: uFrames.getUpdatedFramesList()) { if (uFrame.getId().equals(frame.getFrameId())) { assertTrue(uFrame.hasFrameStateDisplayOverride()); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java index 2ea9b5dde..b7a4dfbf8 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/JobManagerTests.java @@ -50,7 +50,6 @@ import com.imageworks.spcue.dao.LayerDao; import com.imageworks.spcue.dao.criteria.FrameSearchFactory; import com.imageworks.spcue.dao.criteria.FrameSearchInterface; -import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.grpc.host.HardwareState; import com.imageworks.spcue.grpc.job.FrameSearchCriteria; import com.imageworks.spcue.grpc.job.FrameState; @@ -454,7 +453,11 @@ public void optimizeLayer() { JobInterface job = getJob3(); LayerDetail layer = layerDao.findLayerDetail(job, "pass_1"); - assertEquals(Dispatcher.MEM_RESERVED_DEFAULT, layer.minimumMemory); + // Hardcoded value of dispatcher.memory.mem_reserved_default + // to avoid having to read opencue.properties on a test setting + long memReservedDefault = 3355443; + + assertEquals(memReservedDefault, layer.minimumMemory); assertThat(layer.tags, contains("general")); /* diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/util/CoreSpanTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/util/CoreSpanTests.java index b72d2b5f6..5543cc698 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/util/CoreSpanTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/util/CoreSpanTests.java @@ -25,7 +25,6 @@ import com.imageworks.spcue.DispatchFrame; import com.imageworks.spcue.DispatchHost; import com.imageworks.spcue.VirtualProc; -import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.grpc.host.ThreadMode; import com.imageworks.spcue.util.CueUtil; @@ -97,10 +96,13 @@ public void testCoreSpanTest3() { host.idleMemory = CueUtil.GB8; host.cores = 800; host.idleCores = 780; + // Hardcoded value of dispatcher.memory.mem_reserved_default + // to avoid having to read opencue.properties on a test setting + long memReservedDefault = 3355443; DispatchFrame frame = new DispatchFrame(); frame.minCores = 100; - frame.minMemory = Dispatcher.MEM_RESERVED_DEFAULT; + frame.minMemory = memReservedDefault; frame.threadable = true; VirtualProc proc = VirtualProc.build(host, frame); @@ -133,10 +135,13 @@ public void testBuildVirtualProc() { host.idleMemory = CueUtil.GB8; host.cores = 800; host.idleCores = 800; + // Hardcoded value of dispatcher.memory.mem_reserved_default + // to avoid having to read opencue.properties on a test setting + long memReservedDefault = 3355443; DispatchFrame frame = new DispatchFrame(); frame.minCores = 100; - frame.minMemory = Dispatcher.MEM_RESERVED_DEFAULT; + frame.minMemory = memReservedDefault; frame.threadable = true; proc = VirtualProc.build(host, frame); diff --git a/cuebot/src/test/resources/opencue.properties b/cuebot/src/test/resources/opencue.properties index cfaec991c..5ec6fba06 100644 --- a/cuebot/src/test/resources/opencue.properties +++ b/cuebot/src/test/resources/opencue.properties @@ -85,4 +85,12 @@ dispatcher.frame_kill_retry_limit=3 # A comma separated list of services that should have their frames considered # selfish. A selfish frame will reserve all the available cores to avoid # having to share resources with other renders. -dispatcher.frame.selfish.services=arnold,selfish-service \ No newline at end of file +dispatcher.frame.selfish.services=arnold,selfish-service + +dispatcher.memory.mem_reserved_default = 3355443 +dispatcher.memory.mem_reserved_max = 52428800 +dispatcher.memory.mem_reserved_min = 262144 +dispatcher.memory.mem_reserved_system = 524288 +dispatcher.memory.mem_gpu_reserved_default = 0 +dispatcher.memory.mem_gpu_reserved_min = 0 +dispatcher.memory.mem_gpu_reserved_max = 104857600 \ No newline at end of file