Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rolling Update Fixes #295

Merged
merged 32 commits into from
Nov 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
8f22fdd
initial commit for overhaul on managing rolling updates
ukclivecox Nov 4, 2018
390f9a5
add new test for rolling update form 1 component to 2 in 2 deployemnts
ukclivecox Nov 5, 2018
c30ee00
merge master
ukclivecox Nov 10, 2018
8cff162
Merge branch 'python_versions' into deployment_hashes
ukclivecox Nov 10, 2018
d1e3db5
increase readiness failure count and updates for testing
ukclivecox Nov 10, 2018
7313407
add end-2-end tests
ukclivecox Nov 11, 2018
83b3cdf
Update tests
ukclivecox Nov 11, 2018
642a962
Merge branch 'master' into deployment_hashes
ukclivecox Nov 11, 2018
aba157b
comments about powermock and sure fire in pom
ukclivecox Nov 12, 2018
a6adf10
check for duplicate predictor names
ukclivecox Nov 12, 2018
51abe61
make ready boolean AtomicBoolean
ukclivecox Nov 12, 2018
16496b8
add more tests for bad graphs
ukclivecox Nov 12, 2018
32c441b
add port forward to test and updated helm chart to wait only for svc-…
ukclivecox Nov 12, 2018
6baeae0
updates from testing
ukclivecox Nov 15, 2018
9bb6983
merge custom metric fixes
ukclivecox Nov 15, 2018
f2f46c9
remove PowerMock dependency
ukclivecox Nov 16, 2018
efcff59
Add tests for deployment watcher
ukclivecox Nov 16, 2018
a08cd6a
Make service orchestrator same deployment by default but allow separa…
ukclivecox Nov 18, 2018
67af7a0
allow engine conect retries and enginer runas user to be confgurable
ukclivecox Nov 19, 2018
1dc1725
update load tester to include microservice testing and mnist grpc
ukclivecox Nov 19, 2018
e6c958a
ensure shared managed channel used by grpc calls
ukclivecox Nov 19, 2018
b162d44
add test for grpc channel manager
ukclivecox Nov 19, 2018
847cfe7
add further tests for grpc channel handler
ukclivecox Nov 19, 2018
3243e35
revert version to 0.2.5-SNAPSHOT in helm seldon-core
ukclivecox Nov 19, 2018
ca7cdbe
Updates for deleting deployments and services in two stages if possible
ukclivecox Nov 21, 2018
4eed789
Update waits for prestop handling
ukclivecox Nov 22, 2018
5667f81
add missing send-feedback endpoint for model in R
ukclivecox Nov 22, 2018
4d7a37b
Merge branch 'master' into deployment_hashes
ukclivecox Nov 26, 2018
fb36ab3
update example helm chart
ukclivecox Nov 26, 2018
5c383fa
update exampe notenooks
ukclivecox Nov 26, 2018
384d9c3
fix release script for ksonnet versions
ukclivecox Nov 26, 2018
3cdd652
remove powermock reference in cluster manager pom
ukclivecox Nov 26, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions cluster-manager/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.1.RELEASE</version>
<version>1.5.17.RELEASE</version>
</parent>

<properties>
Expand All @@ -37,12 +37,17 @@

<dependencies>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion> <!-- Excluded due to nn standard license -->
<groupId>org.json</groupId>
<artifactId>json</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>io.grpc</groupId>
Expand Down Expand Up @@ -71,21 +76,14 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-client</artifactId>
<version>2.0.0</version>
</dependency>

<dependency>
<groupId>io.kubernetes</groupId>
<artifactId>client-java</artifactId>
<version>3.0.0</version>
<scope>compile</scope>
</dependency>



</dependencies>

<build>
Expand Down Expand Up @@ -214,6 +212,18 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<!--
Needed due to https://issues.apache.org/jira/browse/SUREFIRE-1588
Should be removed when fixed
-->

<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<useSystemClassLoader>false</useSystemClassLoader>
</configuration>
</plugin>
</plugins>
</build>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class ClusterManagerProperites {
private String engineContainerServiceAccountName = "default";
private int puContainerPortBase;
private String namespace;
private int engineUser = 8889;

public int getEngineContainerPort() {
return engineContainerPort;
Expand Down Expand Up @@ -84,6 +85,14 @@ public void setNamespace(String namespace) {
this.namespace = namespace;
}

public int getEngineUser() {
return engineUser;
}

public void setEngineUser(int engineUser) {
this.engineUser = engineUser;
}

@Override
public String toString() {
return ReflectionToStringBuilder.toString(this, ToStringStyle.SHORT_PREFIX_STYLE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.io.InputStreamReader;
import java.lang.reflect.Type;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
Expand All @@ -16,6 +15,8 @@

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.google.gson.reflect.TypeToken;

Expand All @@ -26,14 +27,23 @@
import io.kubernetes.client.ProgressRequestBody;
import io.kubernetes.client.ProgressResponseBody;
import io.kubernetes.client.models.V1beta1CustomResourceDefinition;
import io.kubernetes.client.util.Config;
import io.seldon.clustermanager.k8s.client.K8sClientProvider;

@Component
public class CRDCreator {
protected static Logger logger = LoggerFactory.getLogger(CRDCreator.class.getName());

private final K8sClientProvider k8sClientProvider;

@Autowired
public CRDCreator(K8sClientProvider k8sClientProvider) {
super();
this.k8sClientProvider = k8sClientProvider;
}
public void createCRD() throws IOException, ApiException
{
String jsonStr = readFileFromClasspath("crd.json");
ApiClient client = Config.defaultClient();
ApiClient client = k8sClientProvider.getClient();
try {
createCustomResourceDefinition(client,jsonStr.getBytes(),null);
logger.info("Created CRD");
Expand Down Expand Up @@ -79,7 +89,7 @@ private String readFile(String path, Charset encoding)
return new String(encoded, encoding);
}

private V1beta1CustomResourceDefinition createCustomResourceDefinition(ApiClient apiClient,byte[] body, String pretty)
protected V1beta1CustomResourceDefinition createCustomResourceDefinition(ApiClient apiClient,byte[] body, String pretty)
throws ApiException {
ApiResponse<V1beta1CustomResourceDefinition> resp = createCustomResourceDefinitionWithHttpInfo(apiClient,body, pretty);
return resp.getData();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
package io.seldon.clustermanager.k8s;

public class Constants {
public static final String LABEL_SELDON_ID = "seldon-deployment-id";
public static final String LABEL_SELDON_ID = "seldon-deployment-id";
public static final String LABEL_SELDON_SVCORCH = "seldon-deployment-contains-svcorch";
public static final String STATE_CREATING = "Creating";
public static final String STATE_FAILED = "Failed";
public static final String STATE_AVAILABLE = "Available";

public static final String ENGINE_JAVA_OPTS_ANNOTATION = "seldon.io/engine-java-opts";
public static final String ENGINE_SEPARATE_ANNOTATION = "seldon.io/engine-separate-pod";
public static final String REST_READ_TIMEOUT_ANNOTATION = "seldon.io/rest-read-timeout";
public static final String GRPC_READ_TIMEOUT_ANNOTATION = "seldon.io/grpc-read-timeout";
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@
import io.kubernetes.client.ApiException;
import io.kubernetes.client.apis.ExtensionsV1beta1Api;
import io.kubernetes.client.models.ExtensionsV1beta1Deployment;
import io.kubernetes.client.models.ExtensionsV1beta1DeploymentStatus;
import io.kubernetes.client.models.V1OwnerReference;
import io.kubernetes.client.util.Config;
import io.kubernetes.client.util.Watch;
import io.seldon.clustermanager.ClusterManagerProperites;
import io.seldon.clustermanager.k8s.client.K8sApiProvider;
import io.seldon.clustermanager.k8s.client.K8sClientProvider;

@Component
public class DeploymentWatcher {
Expand All @@ -48,13 +51,17 @@ public class DeploymentWatcher {
private int resourceVersionProcessed = 0;

private final SeldonDeploymentStatusUpdate statusUpdater;
private final K8sClientProvider k8sClientProvider;
private final K8sApiProvider k8sApiProvider;
private final String namespace;

@Autowired
public DeploymentWatcher(ClusterManagerProperites clusterManagerProperites,SeldonDeploymentStatusUpdate statusUpdater)
public DeploymentWatcher(K8sApiProvider k8sApiProvider,K8sClientProvider k8sClientProvider,ClusterManagerProperites clusterManagerProperites,SeldonDeploymentStatusUpdate statusUpdater)
{
this.statusUpdater = statusUpdater;
this.namespace = StringUtils.isEmpty(clusterManagerProperites.getNamespace()) ? "default" : clusterManagerProperites.getNamespace();
this.k8sClientProvider = k8sClientProvider;
this.k8sApiProvider = k8sApiProvider;
}

public int watchDeployments(int resourceVersion,int resourceVersionProcessed) throws ApiException, IOException
Expand All @@ -66,8 +73,8 @@ public int watchDeployments(int resourceVersion,int resourceVersionProcessed) th

int maxResourceVersion = resourceVersion;

ApiClient client = Config.defaultClient();
ExtensionsV1beta1Api api = new ExtensionsV1beta1Api(client);
ApiClient client = k8sClientProvider.getClient();
ExtensionsV1beta1Api api = k8sApiProvider.getExtensionsV1beta1Api(client);

Watch<ExtensionsV1beta1Deployment> watch = Watch.createWatch(
client,
Expand All @@ -77,6 +84,11 @@ public int watchDeployments(int resourceVersion,int resourceVersionProcessed) th
try
{
for (Watch.Response<ExtensionsV1beta1Deployment> item : watch) {
if (item.object == null)
{
logger.warn("Bad watch returned will reset resource version type:{} status:{} ",item.type,item.status.toString());
return 0;
}
int resourceVersionNew = Integer.parseInt(item.object.getMetadata().getResourceVersion());
if (resourceVersionNew <= resourceVersionProcessed)
{
Expand All @@ -102,6 +114,8 @@ public int watchDeployments(int resourceVersion,int resourceVersionProcessed) th
{
String mlDepName = ownerRef.getName();
String depName = item.object.getMetadata().getName();
ExtensionsV1beta1DeploymentStatus status = item.object.getStatus();
logger.info("{} {} {} replicas:{} replicasAvailable(ready):{} replicasUnavilable:{} replicasReady(available):{}",item.type,mlDepName,depName,status.getReplicas(),status.getReadyReplicas(),status.getUnavailableReplicas(),status.getAvailableReplicas());
statusUpdater.updateStatus(mlDepName, depName, item.object.getStatus().getReplicas(),item.object.getStatus().getReadyReplicas());
}
}
Expand All @@ -113,6 +127,8 @@ public int watchDeployments(int resourceVersion,int resourceVersionProcessed) th
{
String mlDepName = ownerRef.getName();
String depName = item.object.getMetadata().getName();
ExtensionsV1beta1DeploymentStatus status = item.object.getStatus();
logger.info("{} {} {} replicas:{} replicasAvailable(ready):{} replicasUnavilable:{} replicasReady(available):{}",item.type,mlDepName,depName,status.getReplicas(),status.getReadyReplicas(),status.getUnavailableReplicas(),status.getAvailableReplicas());
statusUpdater.removeStatus(mlDepName,depName);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import io.kubernetes.client.proto.Meta.ObjectMeta;
import io.kubernetes.client.util.Config;
import io.seldon.clustermanager.ClusterManagerProperites;
import io.seldon.clustermanager.k8s.client.K8sApiProvider;
import io.seldon.clustermanager.k8s.client.K8sClientProvider;
import io.seldon.protos.DeploymentProtos.SeldonDeployment;

@Component
Expand All @@ -54,9 +56,14 @@ public class KubeCRDHandlerImpl implements KubeCRDHandler {

private boolean replaceStatusResource = true; // Whether to use the status CR endpoint (available from k8s 1.10 (alpha) 1.11 (beta)

private final K8sClientProvider k8sClientProvider;
private final K8sApiProvider k8sApiProvider;

@Autowired
public KubeCRDHandlerImpl(ClusterManagerProperites clusterManagerProperites) {
public KubeCRDHandlerImpl(K8sApiProvider k8sApiProvider,K8sClientProvider k8sClientProvider,ClusterManagerProperites clusterManagerProperites) {
this.namespace = StringUtils.isEmpty(clusterManagerProperites.getNamespace()) ? "default" : clusterManagerProperites.getNamespace();
this.k8sClientProvider= k8sClientProvider;
this.k8sApiProvider = k8sApiProvider;
}

@Override
Expand All @@ -80,7 +87,6 @@ public void updateRaw(String json,String seldonDeploymentName) {

@Override
public void updateSeldonDeploymentStatus(SeldonDeployment mldep) {

try
{
// Need to remove resourceVersion from the representation used for last-applied-configuration otherwise you will errors subsequently using kubectl
Expand All @@ -95,9 +101,9 @@ public void updateSeldonDeploymentStatus(SeldonDeployment mldep) {
.putAnnotations("kubectl.kubernetes.io/last-applied-configuration", json+"\n")).build();
json = SeldonDeploymentUtils.toJson(mlDeployment,false);

logger.debug("Updating seldondeployment "+mlDeployment.getMetadata().getName());
ApiClient client = Config.defaultClient();
CustomObjectsApi api = new CustomObjectsApi(client);
logger.debug("Updating seldondeployment {} with status {}",mlDeployment.getMetadata().getName(),mlDeployment.getStatus());
ApiClient client = k8sClientProvider.getClient();
CustomObjectsApi api = k8sApiProvider.getCustomObjectsApi(client);
if (replaceStatusResource)
{
try
Expand All @@ -124,8 +130,8 @@ public void updateSeldonDeploymentStatus(SeldonDeployment mldep) {
public SeldonDeployment getSeldonDeployment(String name) {
try
{
ApiClient client = Config.defaultClient();
CustomObjectsApi api = new CustomObjectsApi(client);
ApiClient client = k8sClientProvider.getClient();
CustomObjectsApi api = k8sApiProvider.getCustomObjectsApi(client);
Object resp = api.getNamespacedCustomObject(GROUP, VERSION, namespace, KIND_PLURAL, name);
Gson gson = new GsonBuilder().create();
String json = gson.toJson(resp);
Expand All @@ -148,7 +154,7 @@ public SeldonDeployment getSeldonDeployment(String name) {
public ExtensionsV1beta1DeploymentList getOwnedDeployments(String seldonDeploymentName) {
try
{
ApiClient client = Config.defaultClient();
ApiClient client = k8sClientProvider.getClient();
ExtensionsV1beta1Api api = new ExtensionsV1beta1Api(client);
ExtensionsV1beta1DeploymentList l = api.listNamespacedDeployment(namespace, null, null, null, false, Constants.LABEL_SELDON_ID+"="+seldonDeploymentName, null, null, null, false);
return l;
Expand All @@ -165,7 +171,7 @@ public ExtensionsV1beta1DeploymentList getOwnedDeployments(String seldonDeployme
public V1ServiceList getOwnedServices(String seldonDeploymentName) {
try
{
ApiClient client = Config.defaultClient();
ApiClient client = k8sClientProvider.getClient();
io.kubernetes.client.apis.CoreV1Api api = new CoreV1Api(client);
V1ServiceList l = api.listNamespacedService(namespace, null, null, null, false, Constants.LABEL_SELDON_ID+"="+seldonDeploymentName, null, null, null, null);
return l;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@

public interface SeldonDeploymentController {
public void createOrReplaceSeldonDeployment(SeldonDeployment mlDep);
public void removeInitialUnusedResources(SeldonDeployment mlDep);
public void removeAllUnusedResources(SeldonDeployment mlDep);
}
Loading