Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ai.triton.server): expose "timeout" parameter for long running operations #4017

Merged
merged 24 commits into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9a51a3c
feat(TritonServerServiceOptions): add timeout parameters computation
mattdibi Jun 10, 2022
4655c92
feat(metatype): expose "timeout" parameter
mattdibi Jun 10, 2022
92da3f6
feat(TritonServerServiceImpl): use newly added timeout variables
mattdibi Jun 10, 2022
baa5588
refactor(TritonServerServiceImpl): more compact checks for readability
mattdibi Jun 10, 2022
6bf462d
feat(TritonServerServiceOptions): improve input checking
mattdibi Jun 10, 2022
6bfd613
revert: "feat(TritonServerServiceOptions): improve input checking"
mattdibi Jun 10, 2022
ef8914d
feat(metatype): add minimum value for timeout
mattdibi Jun 10, 2022
7dee548
feat(metatype): add sensible max timeout value
mattdibi Jun 10, 2022
1678d85
refactor(TritonServerServiceOptions): remove unnecessary check
mattdibi Jun 10, 2022
731a862
feat(metatype): remove timeout==0 -> wait indifinitely
mattdibi Jun 10, 2022
db86717
feat(TritonServerLocalManager): add kill() method
mattdibi Jun 10, 2022
b45b3e9
fix(TritonServerServiceImpl): removed unnecessary check
mattdibi Jun 10, 2022
adbbcd2
test: add starting TritonServerServiceOptionsTest
mattdibi Jun 10, 2022
a2c0c5f
refactor(TritonServerServiceOptionsTest): adopt Gerkhin style
mattdibi Jun 10, 2022
d1f0eeb
refactor(TritonServerServiceOptions): updated equals and hashCode met…
mattdibi Jun 10, 2022
b0c0509
refactor(TritonServerServiceOptionsTest): rename method when->given
mattdibi Jun 10, 2022
6d53f96
test: add equals method tests
mattdibi Jun 10, 2022
5976372
test(TritonServerServiceOptionsTest): add hashCode method tests
mattdibi Jun 10, 2022
0b31e2f
refactor(TritonServerServiceOptionsTest): improved test naming conven…
mattdibi Jun 10, 2022
2d44b23
test(TritonServerLocalManagerTest): add kill method tests
mattdibi Jun 13, 2022
ab19de1
test(TritonServerLocalManagerTest): add stop method test
mattdibi Jun 13, 2022
c7781e6
refactor: apply Gherkhin style
mattdibi Jun 13, 2022
5a93f98
fix(metatype): add missing comma
mattdibi Jun 14, 2022
3e3ae3c
fix(test): add missing copyright headers
mattdibi Jun 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@
description="Only for local instance, a semi-colon separated list of configuration for the backends. i.e. tensorflow,version=2;tensorflow,allow-soft-placement=false">
</AD>

<AD id="timeout"
name="Timeout (in seconds) for time consuming tasks"
type="Integer"
cardinality="0"
required="false"
min="1"
max="3600"
default="3"
description="Timeout (in seconds) for time consuming tasks like server startup, shutdown or model load. If the task exceeds the timeout, the operation will be terminated with an error.">
</AD>

</OCD>
<Designate factoryPid="org.eclipse.kura.ai.triton.server.TritonServerService">
<Object ocdref="org.eclipse.kura.ai.triton.server.TritonServerService"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ protected void stop() {
stopScheduledExecutor();
}

protected void kill() {
killLocalServerMonitor();
stopScheduledExecutor();
}

private void startLocalServerMonitor() {
this.serverCommand = createServerCommand();
this.scheduledFuture = this.scheduledExecutorService.scheduleAtFixedRate(() -> {
Expand Down Expand Up @@ -96,6 +101,11 @@ private void stopLocalServerMonitor() {
stopLocalServer();
}

private void killLocalServerMonitor() {
stopMonitor();
killLocalServer();
}

private void stopMonitor() {
if (nonNull(this.scheduledFuture)) {
this.scheduledFuture.cancel(true);
Expand All @@ -109,6 +119,10 @@ private synchronized void stopLocalServer() {
TritonServerLocalManager.this.commandExecutorService.kill(TRITONSERVER, LinuxSignal.SIGINT);
}

private synchronized void killLocalServer() {
TritonServerLocalManager.this.commandExecutorService.kill(TRITONSERVER, LinuxSignal.SIGKILL);
}

private void stopScheduledExecutor() {
if (this.scheduledExecutorService != null) {
this.scheduledExecutorService.shutdown();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ public class TritonServerServiceImpl implements InferenceEngineService, Configur

private static final Logger logger = LoggerFactory.getLogger(TritonServerServiceImpl.class);
private static final String TEMP_DIRECTORY_PREFIX = "decrypted_models";
private static final int N_ATTEMPT = 6;
private static final int WAIT_ATTEMPT_MS = 500;

private CommandExecutorService commandExecutorService;
private CryptoService cryptoService;
Expand All @@ -104,6 +102,7 @@ protected void activate(Map<String, Object> properties) {
public void updated(Map<String, Object> properties) {
logger.info("Update TritonServerService...");
TritonServerServiceOptions newOptions = new TritonServerServiceOptions(properties);

if (newOptions.equals(this.options)) {
return;
}
Expand Down Expand Up @@ -151,11 +150,11 @@ private void stopLocalInstance() {

int counter = 0;
while (this.tritonServerLocalManager.isLocalServerRunning()) {
if (counter++ >= N_ATTEMPT) {
logger.warn("Cannot stop local server instance.");
return;
if (counter++ >= this.options.getNRetries()) {
logger.warn("Cannot stop local server instance. Killing it.");
this.tritonServerLocalManager.kill();
}
TritonServerLocalManager.sleepFor(WAIT_ATTEMPT_MS);
TritonServerLocalManager.sleepFor(this.options.getRetryInterval());
}

if (this.options.modelsAreEncrypted()) {
Expand Down Expand Up @@ -196,11 +195,11 @@ protected void loadModels() {

int counter = 0;
while (!isEngineReady()) {
if (counter++ >= N_ATTEMPT) {
if (counter++ >= this.options.getNRetries()) {
logger.warn("Cannot load models since server is not ready.");
return;
}
TritonServerLocalManager.sleepFor(WAIT_ATTEMPT_MS);
TritonServerLocalManager.sleepFor(this.options.getRetryInterval());
}

this.options.getModels().forEach(modelName -> {
Expand Down Expand Up @@ -245,11 +244,11 @@ public void loadModel(String modelName, Optional<String> modelPath) throws KuraE
if (this.options.modelsAreEncrypted()) {
int counter = 0;
while (!isModelLoaded(modelName)) {
if (counter++ >= N_ATTEMPT) {
if (counter++ >= this.options.getNRetries()) {
logger.warn("Cannot check if model was correctly loaded. Wiping decrypted model anyway");
break;
}
TritonServerLocalManager.sleepFor(WAIT_ATTEMPT_MS);
TritonServerLocalManager.sleepFor(this.options.getRetryInterval());
}
TritonServerEncryptionUtils.cleanRepository(this.decryptionFolderPath);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,17 @@ public class TritonServerServiceOptions {
private static final String PROPERTY_LOCAL_BACKENDS_CONFIG = "local.backends.config";
private static final String PROPERTY_MODELS = "models";
private static final String PROPERTY_LOCAL = "enable.local";
private static final String PROPERTY_TIMEOUT = "timeout";
private final Map<String, Object> properties;

private static final int RETRY_INTERVAL = 500; // ms

private final int httpPort;
private final int grpcPort;
private final int metricsPort;
private final boolean isLocal;
private final int timeout;
private final int nRetries;

public TritonServerServiceOptions(final Map<String, Object> properties) {
requireNonNull(properties, "Properties cannot be null");
Expand Down Expand Up @@ -73,6 +78,14 @@ public TritonServerServiceOptions(final Map<String, Object> properties) {
} else {
this.isLocal = false;
}

final Object propertyTimeout = this.properties.get(PROPERTY_TIMEOUT);
if (propertyTimeout instanceof Integer) {
this.timeout = (Integer) propertyTimeout;
} else {
this.timeout = 3;
}
this.nRetries = (this.timeout * 1000) / RETRY_INTERVAL;
}

public String getAddress() {
Expand Down Expand Up @@ -138,6 +151,18 @@ public List<String> getModels() {
return models;
}

public int getNRetries() {
return this.nRetries;
}

public int getTimeout() {
return this.timeout;
}

public int getRetryInterval() {
return RETRY_INTERVAL;
}

private String getStringProperty(String propertyName) {
String stringProperty = "";
final Object stringPropertyObj = this.properties.get(propertyName);
Expand All @@ -149,7 +174,8 @@ private String getStringProperty(String propertyName) {

@Override
public int hashCode() {
return Objects.hash(this.grpcPort, this.httpPort, this.isLocal, this.metricsPort, this.properties);
return Objects.hash(this.grpcPort, this.httpPort, this.isLocal, this.metricsPort, this.timeout, this.nRetries,
this.properties);
}

@Override
Expand All @@ -162,7 +188,8 @@ public boolean equals(Object obj) {
}
TritonServerServiceOptions other = (TritonServerServiceOptions) obj;
return this.grpcPort == other.grpcPort && this.httpPort == other.httpPort && this.isLocal == other.isLocal
&& this.metricsPort == other.metricsPort && Objects.equals(this.properties, other.properties);
&& this.metricsPort == other.metricsPort && this.timeout == other.timeout
&& this.nRetries == other.nRetries && Objects.equals(this.properties, other.properties);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*******************************************************************************
* Copyright (c) 2022 Eurotech and/or its affiliates and others
*
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Eurotech
******************************************************************************/

package org.eclipse.kura.ai.triton.server;
mattdibi marked this conversation as resolved.
Show resolved Hide resolved

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.util.HashMap;
import java.util.Map;

import org.eclipse.kura.core.linux.executor.LinuxSignal;
import org.eclipse.kura.executor.CommandExecutorService;
import org.junit.Test;

public class TritonServerLocalManagerTest {

private static final String[] TRITONSERVERCMD = new String[] { "tritonserver" };
private static final String MOCK_DECRYPT_FOLDER = "test";

private Map<String, Object> properties = new HashMap<>();
private TritonServerServiceOptions options = new TritonServerServiceOptions(properties);

private CommandExecutorService ces;
private TritonServerLocalManager manager;

@Test
public void killMethodShouldWork() {
givenPropertyWith("server.address", "localhost");
givenPropertyWith("server.ports", new Integer[] { 4000, 4001, 4002 });
givenPropertyWith("enable.local", Boolean.TRUE);
givenServiceOptionsBuiltWith(properties);

givenMockCommandExecutionService();
givenMockCommandExecutionServiceReturnsTritonIsRunning();

givenLocalManagerBuiltWith(this.options, this.ces, MOCK_DECRYPT_FOLDER);

whenKillIsCalled();

thenCommandServiceKillWasCalledWith(TRITONSERVERCMD, LinuxSignal.SIGKILL);
}

@Test
public void stopMethodShouldWork() {
givenPropertyWith("server.address", "localhost");
givenPropertyWith("server.ports", new Integer[] { 4000, 4001, 4002 });
givenPropertyWith("enable.local", Boolean.TRUE);
givenServiceOptionsBuiltWith(properties);

givenMockCommandExecutionService();
givenMockCommandExecutionServiceReturnsTritonIsRunning();

givenLocalManagerBuiltWith(this.options, this.ces, MOCK_DECRYPT_FOLDER);

whenStopIsCalled();

thenCommandServiceKillWasCalledWith(TRITONSERVERCMD, LinuxSignal.SIGINT);
}

/*
* Given
*/
private void givenPropertyWith(String name, Object value) {
this.properties.put(name, value);
}

private void givenServiceOptionsBuiltWith(Map<String, Object> properties) {
this.options = new TritonServerServiceOptions(properties);
}

private void givenMockCommandExecutionService() {
this.ces = mock(CommandExecutorService.class);
}

private void givenMockCommandExecutionServiceReturnsTritonIsRunning() {
when(this.ces.isRunning(new String[] { "tritonserver" })).thenReturn(true);
}

private void givenLocalManagerBuiltWith(TritonServerServiceOptions options, CommandExecutorService ces,
String decryptionFolder) {
this.manager = new TritonServerLocalManager(options, ces, decryptionFolder);
}

/*
* When
*/
private void whenKillIsCalled() {
this.manager.kill();
}

private void whenStopIsCalled() {
this.manager.stop();
}

/*
* Then
*/
private void thenCommandServiceKillWasCalledWith(String[] expectedCmd, LinuxSignal expectedSignal) {
verify(this.ces, times(1)).kill(expectedCmd, expectedSignal);
}
}
Loading