From 7397140cb2d97adfb9acdca05e5de51d34bbd54a Mon Sep 17 00:00:00 2001 From: Dan Crankshaw Date: Fri, 11 May 2018 20:54:37 +0000 Subject: [PATCH 01/63] started impl, but need to dynamically set labels for prom and svcs --- .../docker/docker_container_manager.py | 17 +++++++--- .../kubernetes_container_manager.py | 32 +++++++++++++++++-- .../kubernetes/kubernetes_metric_utils.py | 20 ++++++------ 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index e645682c5..a220315aa 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -22,6 +22,7 @@ class DockerContainerManager(ContainerManager): def __init__(self, + cluster_name, docker_ip_address="localhost", clipper_query_port=1337, clipper_management_port=1338, @@ -34,6 +35,9 @@ def __init__(self, """ Parameters ---------- + cluster_name : str + A unique name for this Clipper cluster. This can be used to run multiple Clipper + clusters on the same node without interfering with each other. docker_ip_address : str, optional The public hostname or IP address at which the Clipper Docker containers can be accessed via their exposed ports. This should almost always @@ -58,6 +62,7 @@ def __init__(self, Any additional keyword arguments to pass to the call to :py:meth:`docker.client.containers.run`. """ + self.cluster_name = cluster_name self.public_hostname = docker_ip_address self.clipper_query_port = clipper_query_port self.clipper_management_port = clipper_management_port @@ -82,9 +87,9 @@ def __init__(self, # Merge Clipper-specific labels with any user-provided labels if "labels" in self.extra_container_kwargs: self.common_labels = self.extra_container_kwargs.pop("labels") - self.common_labels.update({CLIPPER_DOCKER_LABEL: ""}) + self.common_labels.update({CLIPPER_DOCKER_LABEL: self.cluster_name}) else: - self.common_labels = {CLIPPER_DOCKER_LABEL: ""} + self.common_labels = {CLIPPER_DOCKER_LABEL: self.cluster_name} container_args = { "network": self.docker_network, @@ -285,7 +290,9 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): def get_logs(self, logging_dir): containers = self.docker_client.containers.list( filters={ - "label": CLIPPER_DOCKER_LABEL + "label": "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.cluster_name) }) logging_dir = os.path.abspath(os.path.expanduser(logging_dir)) @@ -328,7 +335,9 @@ def stop_all_model_containers(self): def stop_all(self): containers = self.docker_client.containers.list( filters={ - "label": CLIPPER_DOCKER_LABEL + "label": "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.cluster_name) }) for c in containers: c.stop() diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 0f27da058..87db6cffd 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -5,7 +5,7 @@ CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_METRIC_PORT) from ..exceptions import ClipperException -from .kubernetes_metric_utils import start_prometheus, CLIPPER_FRONTEND_EXPORTER_IMAGE +# from .kubernetes_metric_utils import start_prometheus, CLIPPER_FRONTEND_EXPORTER_IMAGE from contextlib import contextmanager from kubernetes import client, config @@ -38,14 +38,19 @@ def _pass_conflicts(): class KubernetesContainerManager(ContainerManager): def __init__(self, + cluster_name, kubernetes_proxy_addr=None, redis_ip=None, redis_port=6379, - useInternalIP=False): + useInternalIP=False, + kubernetes_namespace="default"): """ Parameters ---------- + cluster_name : str + A unique name for this Clipper cluster. This can be used to run multiple Clipper + clusters on the same Kubernetes cluster without interfering with each other. kubernetes_proxy_addr : str, optional The proxy address if you are proxying connections locally using ``kubectl proxy``. If this argument is provided, Clipper will construct the appropriate proxy @@ -62,6 +67,9 @@ def __init__(self, throw an exception if none of the nodes have ExternalDNS. If ``useInternalIP`` is set to true, Clipper will use the Internal IP of the K8S node if no ExternalDNS exists for any of the nodes. + kubernetes_namespace : str, optional + Which Kubernetes namespace to start the Clipper cluster in. The namespace must already exist. + If no namespace is provided, Clipper will be started in the default namespace. Note ---- @@ -71,6 +79,8 @@ def __init__(self, letting Clipper launch one for you. """ + self.cluster_name = cluster_name + if kubernetes_proxy_addr is not None: self.kubernetes_proxy_addr = kubernetes_proxy_addr self.use_k8s_proxy = True @@ -85,6 +95,7 @@ def __init__(self, configuration.assert_hostname = False self._k8s_v1 = client.CoreV1Api() self._k8s_beta = client.ExtensionsV1beta1Api() + self.namespace = kubernetes_namespace def start_clipper(self, query_frontend_image, @@ -179,10 +190,25 @@ def start_clipper(self, self._k8s_v1.create_namespaced_service( body=body, namespace='default') - start_prometheus(self._k8s_v1, self._k8s_beta) + self._start_prometheus() self.connect() + def _start_prometheus(self): + prom_deployment_path = os.path.join(cur_dir, 'prom_deployment.yaml') + prom_service_path = os.path.join(cur_dir, 'prom_service.yaml') + prom_configmap_path = os.path.join(cur_dir, 'prom_configmap.yaml') + frontend_exporter_deployment_path = os.path.join( + cur_dir, 'frontend-exporter-deployment.yaml') + + with open(prom_configmap_path, 'r') as f: + configmap_body = yaml.load(f) + configmap_body["metadata"]["labels"] + + with _pass_conflicts(): + self._k8s_v1.create_namespaced_config_map(body=configmap_body, namespace=self.namespace) + + def connect(self): nodes = self._k8s_v1.list_node() diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py index fc3d0bc18..ff9dac2ae 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py @@ -34,15 +34,15 @@ def _pass_conflicts(): raise e -def _create_prometheus_configmap(_k8s_v1): +def _create_prometheus_configmap(_k8s_v1, namespace): with open(prom_configmap_path, 'r') as f: data = yaml.load(f) with _pass_conflicts(): - _k8s_v1.create_namespaced_config_map(body=data, namespace='default') + _k8s_v1.create_namespaced_config_map(body=data, namespace=namespace) -def _create_prometheus_deployment(_k8s_beta): +def _create_prometheus_deployment(_k8s_beta, namespace): with open(prom_deployment_path, 'r') as f: data = yaml.load(f) @@ -50,18 +50,18 @@ def _create_prometheus_deployment(_k8s_beta): 'image'] = "prom/prometheus:{version}".format(version=PROM_VERSION) with _pass_conflicts(): - _k8s_beta.create_namespaced_deployment(body=data, namespace='default') + _k8s_beta.create_namespaced_deployment(body=data, namespace=namespace) -def _create_prometheus_service(_k8s_v1): +def _create_prometheus_service(_k8s_v1, namespace): with open(prom_service_path, 'r') as f: data = yaml.load(f) with _pass_conflicts(): - _k8s_v1.create_namespaced_service(body=data, namespace='default') + _k8s_v1.create_namespaced_service(body=data, namespace=namespace) -def start_prometheus(_k8s_v1, _k8s_beta): - _create_prometheus_configmap(_k8s_v1) - _create_prometheus_deployment(_k8s_beta) - _create_prometheus_service(_k8s_v1) +def start_prometheus(_k8s_v1, _k8s_beta, namespace): + _create_prometheus_configmap(_k8s_v1, namespace) + _create_prometheus_deployment(_k8s_beta, namespace) + _create_prometheus_service(_k8s_v1, namespace) From 82fa65856c65f02266b450be980915050e59c960 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 22 May 2018 10:03:11 -0700 Subject: [PATCH 02/63] Added unbound port func, still debugging --- .../clipper_admin/container_manager.py | 41 +++++++++++++++++++ .../docker/docker_container_manager.py | 17 +++++--- integration-tests/multi_tenancy_docker.py | 13 ++++++ 3 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 integration-tests/multi_tenancy_docker.py diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 52027178f..978f73404 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -1,5 +1,7 @@ import abc from .exceptions import ClipperException +import random +import socket # Constants CLIPPER_INTERNAL_QUERY_PORT = 1337 @@ -34,6 +36,45 @@ def parse_model_container_label(label): return splits +def find_unbound_port(start=None, increment=False, port_range=(34256, 50000), verbose=False, logger=None): + """ + Fina a unbound port. + + Parameters + ---------- + start : int + The port number to start with. If this port is unbounded, return this port. + If None, start will be a random port. + increment : bool + If True, find port by incrementing start port; else, random search. + port_range : tuple + The range of port for random number generation + verbose : bool + Verbose flag for logging + logger: logging.Logger + """ + while True: + if not start: + start = random.randint(*port_range) + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.bind(("127.0.0.1", start)) + # Make sure we clean up after binding + del sock + return start + except socket.error as e: + if verbose and logger: + logger.info("Socket error: {}".format(e)) + logger.info( + "randomly generated port %d is bound. Trying again." % start) + + if increment: + start += 1 + else: + start = random.randint(*port_range) + + class ContainerManager(object): __metaclass__ = abc.ABCMeta diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index a220315aa..95d2c5275 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -7,7 +7,7 @@ import time import json from ..container_manager import ( - create_model_container_label, parse_model_container_label, + create_model_container_label, parse_model_container_label, find_unbound_port, ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL, CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL, CLIPPER_INTERNAL_RPC_PORT, @@ -64,16 +64,16 @@ def __init__(self, """ self.cluster_name = cluster_name self.public_hostname = docker_ip_address - self.clipper_query_port = clipper_query_port - self.clipper_management_port = clipper_management_port - self.clipper_rpc_port = clipper_rpc_port + self.clipper_query_port = find_unbound_port(start=clipper_query_port, increment=True) + self.clipper_management_port = find_unbound_port(start=clipper_management_port, increment=True) + self.clipper_rpc_port = find_unbound_port(start=clipper_rpc_port, increment=True) self.redis_ip = redis_ip if redis_ip is None: self.external_redis = False else: self.external_redis = True - self.redis_port = redis_port - self.prometheus_port = prometheus_port + self.redis_port = find_unbound_port(start=redis_port, increment=True) + self.prometheus_port = find_unbound_port(start=prometheus_port, increment=True) if docker_network is "host": raise ClipperException( "DockerContainerManager does not support running Clipper on the " @@ -120,6 +120,11 @@ def start_clipper(self, msg = "Unable to Connect to Docker. Please Check if Docker is running." raise ClipperException(msg) + + + + + if not self.external_redis: logger.info("Starting managed Redis instance in Docker") redis_container = self.docker_client.containers.run( diff --git a/integration-tests/multi_tenancy_docker.py b/integration-tests/multi_tenancy_docker.py new file mode 100644 index 000000000..ed8345058 --- /dev/null +++ b/integration-tests/multi_tenancy_docker.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import, division, print_function +import os +import sys +import requests +import json +import tempfile +import shutil +import numpy as np +import time +import logging +from test_utils import (create_docker_connection, BenchmarkException, + fake_model_data, headers, log_clipper_state) + From b79d36db87de907af61c473757b06be308da5085 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 22 May 2018 12:26:28 -0700 Subject: [PATCH 03/63] Remove all occurance of tmp in metrics Resolve issue #448 --- clipper_admin/clipper_admin/clipper_admin.py | 2 +- .../clipper_admin/container_manager.py | 2 +- .../docker/docker_container_manager.py | 32 +++++++++------ .../docker/docker_metric_utils.py | 39 ++++++++----------- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 373101e70..5ffa2fbd8 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -30,7 +30,7 @@ DEFAULT_LABEL = [] DEFAULT_PREDICTION_CACHE_SIZE_BYTES = 33554432 -CLIPPER_TEMP_DIR = "/tmp/clipper" +CLIPPER_TEMP_DIR = "/tmp/clipper" # Used Internally for Test; Not Windows Compatible logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 978f73404..8401b8281 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -36,7 +36,7 @@ def parse_model_container_label(label): return splits -def find_unbound_port(start=None, increment=False, port_range=(34256, 50000), verbose=False, logger=None): +def find_unbound_port(start=None, increment=True, port_range=(34256, 50000), verbose=False, logger=None): """ Fina a unbound port. diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 95d2c5275..854a49900 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -64,16 +64,16 @@ def __init__(self, """ self.cluster_name = cluster_name self.public_hostname = docker_ip_address - self.clipper_query_port = find_unbound_port(start=clipper_query_port, increment=True) - self.clipper_management_port = find_unbound_port(start=clipper_management_port, increment=True) - self.clipper_rpc_port = find_unbound_port(start=clipper_rpc_port, increment=True) + self.clipper_query_port = clipper_query_port + self.clipper_management_port = clipper_management_port + self.clipper_rpc_port = clipper_rpc_port self.redis_ip = redis_ip if redis_ip is None: self.external_redis = False else: self.external_redis = True - self.redis_port = find_unbound_port(start=redis_port, increment=True) - self.prometheus_port = find_unbound_port(start=prometheus_port, increment=True) + self.redis_port = redis_port + self.prometheus_port = prometheus_port if docker_network is "host": raise ClipperException( "DockerContainerManager does not support running Clipper on the " @@ -98,6 +98,8 @@ def __init__(self, self.extra_container_kwargs.update(container_args) + self.prom_config_path = tempfile.NamedTemporaryFile('w', suffix='.yml', delete=False).name + def start_clipper(self, query_frontend_image, mgmt_frontend_image, @@ -121,12 +123,9 @@ def start_clipper(self, raise ClipperException(msg) - - - - if not self.external_redis: logger.info("Starting managed Redis instance in Docker") + self.redis_port = find_unbound_port(self.redis_port) redis_container = self.docker_client.containers.run( 'redis:alpine', "redis-server --port %s" % self.redis_port, @@ -141,6 +140,7 @@ def start_clipper(self, redis_ip=self.redis_ip, redis_port=self.redis_port) mgmt_labels = self.common_labels.copy() mgmt_labels[CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL] = "" + self.clipper_management_port = find_unbound_port(self.clipper_management_port) self.docker_client.containers.run( mgmt_frontend_image, mgmt_cmd, @@ -162,6 +162,8 @@ def start_clipper(self, query_labels[CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL] = "" query_container_id = random.randint(0, 100000) query_name = "query_frontend-{}".format(query_container_id) + self.clipper_query_port = find_unbound_port(self.clipper_query_port) + self.clipper_rpc_port = find_unbound_port(self.clipper_rpc_port) self.docker_client.containers.run( query_frontend_image, query_cmd, @@ -181,9 +183,14 @@ def start_clipper(self, query_frontend_metric_name, self.docker_client, query_name, self.common_labels, self.extra_container_kwargs) setup_metric_config(query_frontend_metric_name, + self.prom_config_path, CLIPPER_INTERNAL_METRIC_PORT) - run_metric_image(self.docker_client, self.common_labels, - self.prometheus_port, self.extra_container_kwargs) + self.prometheus_port = find_unbound_port(self.prometheus_port) + run_metric_image(self.docker_client, + self.common_labels, + self.prometheus_port, + self.prom_config_path, + self.extra_container_kwargs) self.connect() @@ -249,6 +256,7 @@ def _add_replica(self, name, version, input_type, image): # Metric Section add_to_metric_config(model_container_name, + self.prom_config_path, CLIPPER_INTERNAL_METRIC_PORT) # Return model_container_name so we can check if it's up and running later @@ -290,7 +298,7 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): cur_container = current_replicas.pop() cur_container.stop() # Metric Section - delete_from_metric_config(cur_container.name) + delete_from_metric_config(cur_container.name, self.prom_config_path) def get_logs(self, logging_dir): containers = self.docker_client.containers.list( diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index abaab2e7c..086a3e100 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -2,6 +2,7 @@ import requests import random import os +import tempfile from ..exceptions import ClipperException from ..version import __version__ from ..container_manager import CLIPPER_INTERNAL_QUERY_PORT @@ -9,18 +10,6 @@ PROM_VERSION = "v2.1.0" -def ensure_clipper_tmp(): - """ - Make sure /tmp/clipper directory exist. If not, make one. - :return: None - """ - try: - os.makedirs('/tmp/clipper') - except OSError as e: - # Equivalent to os.makedirs(., exist_ok=True) in py3 - pass - - def get_prometheus_base_config(): """ Generate a basic configuration dictionary for prometheus @@ -57,17 +46,17 @@ def run_query_frontend_metric_image(name, docker_client, query_name, def setup_metric_config(query_frontend_metric_name, + prom_config_path, CLIPPER_INTERNAL_METRIC_PORT): """ Write to file prometheus.yml after frontend-metric is setup. :param query_frontend_metric_name: Corresponding image name + :param prom_config_path: Prometheus config file to write in :param CLIPPER_INTERNAL_METRIC_PORT: Default port. :return: None """ - ensure_clipper_tmp() - - with open('/tmp/clipper/prometheus.yml', 'w') as f: + with open(prom_config_path, 'w') as f: prom_config = get_prometheus_base_config() prom_config_query_frontend = { 'job_name': @@ -86,14 +75,17 @@ def setup_metric_config(query_frontend_metric_name, def run_metric_image(docker_client, common_labels, prometheus_port, - extra_container_kwargs): + prom_config_path, extra_container_kwargs): """ Run the prometheus image. :param docker_client: The docker client object :param common_labels: Labels to pass in + :param prom_config_path: Where config file lives :param extra_container_kwargs: Kwargs to pass in. :return: None """ + + # CMD comes from https://hub.docker.com/r/prom/prometheus/~/dockerfile/ metric_cmd = [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", @@ -108,7 +100,7 @@ def run_metric_image(docker_client, common_labels, prometheus_port, name="metric_frontend-{}".format(random.randint(0, 100000)), ports={'9090/tcp': prometheus_port}, volumes={ - '/tmp/clipper/prometheus.yml': { + prom_config_path: { 'bind': '/etc/prometheus/prometheus.yml', 'mode': 'ro' } @@ -117,10 +109,11 @@ def run_metric_image(docker_client, common_labels, prometheus_port, **extra_container_kwargs) -def add_to_metric_config(model_container_name, CLIPPER_INTERNAL_METRIC_PORT): +def add_to_metric_config(model_container_name, prom_config_path, CLIPPER_INTERNAL_METRIC_PORT): """ Add a new model container to the prometheus.yml configuration file. :param model_container_name: New model container name, need to be unique. + :param prom_config_path: Where prometheus config file lives :param CLIPPER_INTERNAL_METRIC_PORT: Default port :return: None @@ -128,7 +121,7 @@ def add_to_metric_config(model_container_name, CLIPPER_INTERNAL_METRIC_PORT): ------ :py:exc:`clipper.ClipperException` """ - with open('/tmp/clipper/prometheus.yml', 'r') as f: + with open(prom_config_path, 'r') as f: conf = yaml.load(f) for config in conf['scrape_configs']: @@ -150,19 +143,19 @@ def add_to_metric_config(model_container_name, CLIPPER_INTERNAL_METRIC_PORT): } conf['scrape_configs'].append(new_job_dict) - with open('/tmp/clipper/prometheus.yml', 'w') as f: + with open(prom_config_path, 'w') as f: yaml.dump(conf, f) requests.post('http://localhost:9090/-/reload') -def delete_from_metric_config(model_container_name): +def delete_from_metric_config(model_container_name, prom_config_path): """ Delete the stored model container from the prometheus.yml configuration file. :param model_container_name: the model container name to be deleted. :return: None """ - with open('/tmp/clipper/prometheus.yml', 'r') as f: + with open(prom_config_path, 'r') as f: conf = yaml.load(f) for i, config in enumerate(conf['scrape_configs']): @@ -170,7 +163,7 @@ def delete_from_metric_config(model_container_name): conf['scrape_configs'].pop(i) break - with open('/tmp/clipper/prometheus.yml', 'w') as f: + with open(prom_config_path, 'w') as f: yaml.dump(conf, f) requests.post('http://localhost:9090/-/reload') From 1306a4add413f0b92076e780a15281abb49af921 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 22 May 2018 12:30:20 -0700 Subject: [PATCH 04/63] Resolve hard coded prometheus path Should resolve issue #424 --- .../clipper_admin/docker/docker_container_manager.py | 4 +++- .../clipper_admin/docker/docker_metric_utils.py | 10 ++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 854a49900..e65e382b3 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -6,6 +6,7 @@ import random import time import json +import tempfile from ..container_manager import ( create_model_container_label, parse_model_container_label, find_unbound_port, ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, @@ -257,6 +258,7 @@ def _add_replica(self, name, version, input_type, image): # Metric Section add_to_metric_config(model_container_name, self.prom_config_path, + self.prometheus_port, CLIPPER_INTERNAL_METRIC_PORT) # Return model_container_name so we can check if it's up and running later @@ -298,7 +300,7 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): cur_container = current_replicas.pop() cur_container.stop() # Metric Section - delete_from_metric_config(cur_container.name, self.prom_config_path) + delete_from_metric_config(cur_container.name, self.prom_config_path, self.prometheus_port) def get_logs(self, logging_dir): containers = self.docker_client.containers.list( diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index 086a3e100..6c096143f 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -1,8 +1,6 @@ import yaml import requests import random -import os -import tempfile from ..exceptions import ClipperException from ..version import __version__ from ..container_manager import CLIPPER_INTERNAL_QUERY_PORT @@ -109,7 +107,7 @@ def run_metric_image(docker_client, common_labels, prometheus_port, **extra_container_kwargs) -def add_to_metric_config(model_container_name, prom_config_path, CLIPPER_INTERNAL_METRIC_PORT): +def add_to_metric_config(model_container_name, prom_config_path, prometheus_port, CLIPPER_INTERNAL_METRIC_PORT): """ Add a new model container to the prometheus.yml configuration file. :param model_container_name: New model container name, need to be unique. @@ -146,10 +144,10 @@ def add_to_metric_config(model_container_name, prom_config_path, CLIPPER_INTERNA with open(prom_config_path, 'w') as f: yaml.dump(conf, f) - requests.post('http://localhost:9090/-/reload') + requests.post('http://localhost:{prometheus_port}/-/reload'.format(prometheus_port=prometheus_port)) -def delete_from_metric_config(model_container_name, prom_config_path): +def delete_from_metric_config(model_container_name, prom_config_path, prometheus_port): """ Delete the stored model container from the prometheus.yml configuration file. :param model_container_name: the model container name to be deleted. @@ -166,4 +164,4 @@ def delete_from_metric_config(model_container_name, prom_config_path): with open(prom_config_path, 'w') as f: yaml.dump(conf, f) - requests.post('http://localhost:9090/-/reload') + requests.post('http://localhost:{prometheus_port}/-/reload'.format(prometheus_port=prometheus_port)) From 82974f2c02d6e5122ee360b77b1a61d57f2f8a67 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 22 May 2018 15:09:00 -0700 Subject: [PATCH 05/63] Add label selection to replicas --- .../docker/docker_container_manager.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index e65e382b3..4411bd0eb 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -100,6 +100,7 @@ def __init__(self, self.extra_container_kwargs.update(container_args) self.prom_config_path = tempfile.NamedTemporaryFile('w', suffix='.yml', delete=False).name + self.prom_config_path = os.path.realpath(self.prom_config_path) # resolve symlink def start_clipper(self, query_frontend_image, @@ -212,10 +213,12 @@ def deploy_model(self, name, version, input_type, image, num_replicas=1): def _get_replicas(self, name, version): containers = self.docker_client.containers.list( filters={ - "label": - "{key}={val}".format( - key=CLIPPER_MODEL_CONTAINER_LABEL, - val=create_model_container_label(name, version)) + "label":[ + "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name), + "{key}={val}".format( + key=CLIPPER_MODEL_CONTAINER_LABEL, + val=create_model_container_label(name, version)) + ] }) return containers @@ -226,7 +229,10 @@ def _add_replica(self, name, version, input_type, image): containers = self.docker_client.containers.list( filters={ - "label": CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL + "label": [ + "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name), + CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL + ] }) if len(containers) < 1: logger.warning("No Clipper query frontend found.") From 4e81a8eb6048042d0f0283ed9c3eda82bf06aef0 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 28 May 2018 12:14:05 -0700 Subject: [PATCH 06/63] Docker Multi-tenancy done --- clipper_admin/clipper_admin/clipper_admin.py | 2 +- .../clipper_admin/container_manager.py | 19 ++- .../docker/docker_container_manager.py | 135 ++++++++++++------ .../docker/docker_metric_utils.py | 15 +- .../kubernetes_container_manager.py | 4 +- integration-tests/multi_tenancy_docker.py | 1 - 6 files changed, 124 insertions(+), 52 deletions(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 5ffa2fbd8..d6872b380 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -30,7 +30,7 @@ DEFAULT_LABEL = [] DEFAULT_PREDICTION_CACHE_SIZE_BYTES = 33554432 -CLIPPER_TEMP_DIR = "/tmp/clipper" # Used Internally for Test; Not Windows Compatible +CLIPPER_TEMP_DIR = "/tmp/clipper" # Used Internally for Test; Not Windows Compatible logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 8401b8281..9b0702d83 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -8,6 +8,7 @@ CLIPPER_INTERNAL_MANAGEMENT_PORT = 1338 CLIPPER_INTERNAL_RPC_PORT = 7000 CLIPPER_INTERNAL_METRIC_PORT = 1390 +CLIPPER_INTERNAL_REDIS_PORT = 6379 CLIPPER_DOCKER_LABEL = "ai.clipper.container.label" CLIPPER_MODEL_CONTAINER_LABEL = "ai.clipper.model_container.label" @@ -16,6 +17,15 @@ CLIPPER_QUERY_FRONTEND_ID_LABEL = "ai.clipper.query_frontend.id" CONTAINERLESS_MODEL_IMAGE = "NO_CONTAINER" +CLIPPER_DOCKER_PORT_LABELS = { + 'redis': 'ai.clipper.redis.port', + 'query_query': 'ai.clipper.query_frontend.query.port', + 'query_rpc': 'ai.clipper.query_frontend.rpc.port', + 'management': 'ai.clipper.management.port', + 'metric': 'ai.clipper.metric.port' +} +CLIPPER_METRIC_CONFIG_LABEL = 'ai.clipper.metric.config' + # NOTE: we use '_' as the delimiter because kubernetes allows the use # '_' in labels but not in deployment names. We force model names and # versions to be compliant with both limitations, so this gives us an extra @@ -36,7 +46,11 @@ def parse_model_container_label(label): return splits -def find_unbound_port(start=None, increment=True, port_range=(34256, 50000), verbose=False, logger=None): +def find_unbound_port(start=None, + increment=True, + port_range=(34256, 50000), + verbose=False, + logger=None): """ Fina a unbound port. @@ -67,7 +81,8 @@ def find_unbound_port(start=None, increment=True, port_range=(34256, 50000), ver if verbose and logger: logger.info("Socket error: {}".format(e)) logger.info( - "randomly generated port %d is bound. Trying again." % start) + "randomly generated port %d is bound. Trying again." % + start) if increment: start += 1 diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 4411bd0eb..af91c437f 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -8,12 +8,13 @@ import json import tempfile from ..container_manager import ( - create_model_container_label, parse_model_container_label, find_unbound_port, - ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, - CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL, + create_model_container_label, parse_model_container_label, + find_unbound_port, ContainerManager, CLIPPER_DOCKER_LABEL, + CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL, CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL, CLIPPER_INTERNAL_RPC_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_MANAGEMENT_PORT, - CLIPPER_INTERNAL_METRIC_PORT) + CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_INTERNAL_REDIS_PORT, + CLIPPER_DOCKER_PORT_LABELS, CLIPPER_METRIC_CONFIG_LABEL) from ..exceptions import ClipperException from requests.exceptions import ConnectionError from .docker_metric_utils import * @@ -88,7 +89,9 @@ def __init__(self, # Merge Clipper-specific labels with any user-provided labels if "labels" in self.extra_container_kwargs: self.common_labels = self.extra_container_kwargs.pop("labels") - self.common_labels.update({CLIPPER_DOCKER_LABEL: self.cluster_name}) + self.common_labels.update({ + CLIPPER_DOCKER_LABEL: self.cluster_name + }) else: self.common_labels = {CLIPPER_DOCKER_LABEL: self.cluster_name} @@ -99,19 +102,17 @@ def __init__(self, self.extra_container_kwargs.update(container_args) - self.prom_config_path = tempfile.NamedTemporaryFile('w', suffix='.yml', delete=False).name - self.prom_config_path = os.path.realpath(self.prom_config_path) # resolve symlink - def start_clipper(self, query_frontend_image, mgmt_frontend_image, cache_size, + prometheus_version, num_frontend_replicas=1): if num_frontend_replicas != 1: - msg = "Docker container manager's query frontend scale-out \ - hasn't been implemented. Please set num_frontend_replicas=1 \ - or use Kubernetes." - + msg = "Docker container manager's query frontend scale-out " \ + "hasn't been implemented. You can contribute to Clipper at " \ + "https://github.com/ucbrise/clipper." \ + "Please set num_frontend_replicas=1 or use Kubernetes." raise ClipperException(msg) try: @@ -124,25 +125,43 @@ def start_clipper(self, msg = "Unable to Connect to Docker. Please Check if Docker is running." raise ClipperException(msg) + containers_in_cluster = self.docker_client.containers.list( + filters={ + 'label': + ['ai.clipper.container.label={}'.format(self.cluster_name)] + }) + if len(containers_in_cluster) > 0: + raise ClipperException( + "Cluster {} cannot be started because it already exists. " + "Please use clipper_conn.connect() to connect to it.".format( + self.cluster_name)) if not self.external_redis: logger.info("Starting managed Redis instance in Docker") self.redis_port = find_unbound_port(self.redis_port) + redis_labels = self.common_labels.copy() + redis_labels[CLIPPER_DOCKER_PORT_LABELS['redis']] = str( + self.redis_port) redis_container = self.docker_client.containers.run( 'redis:alpine', - "redis-server --port %s" % self.redis_port, + "redis-server --port %s" % CLIPPER_INTERNAL_REDIS_PORT, name="redis-{}".format(random.randint( 0, 100000)), # generate a random name - ports={'%s/tcp' % self.redis_port: self.redis_port}, - labels=self.common_labels.copy(), + ports={ + '%s/tcp' % CLIPPER_INTERNAL_REDIS_PORT: self.redis_port + }, + labels=redis_labels, **self.extra_container_kwargs) self.redis_ip = redis_container.name mgmt_cmd = "--redis_ip={redis_ip} --redis_port={redis_port}".format( - redis_ip=self.redis_ip, redis_port=self.redis_port) + redis_ip=self.redis_ip, redis_port=CLIPPER_INTERNAL_REDIS_PORT) + self.clipper_management_port = find_unbound_port( + self.clipper_management_port) mgmt_labels = self.common_labels.copy() mgmt_labels[CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL] = "" - self.clipper_management_port = find_unbound_port(self.clipper_management_port) + mgmt_labels[CLIPPER_DOCKER_PORT_LABELS['management']] = str( + self.clipper_management_port) self.docker_client.containers.run( mgmt_frontend_image, mgmt_cmd, @@ -158,14 +177,19 @@ def start_clipper(self, query_cmd = ("--redis_ip={redis_ip} --redis_port={redis_port} " "--prediction_cache_size={cache_size}").format( redis_ip=self.redis_ip, - redis_port=self.redis_port, + redis_port=CLIPPER_INTERNAL_REDIS_PORT, cache_size=cache_size) - query_labels = self.common_labels.copy() - query_labels[CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL] = "" + query_container_id = random.randint(0, 100000) query_name = "query_frontend-{}".format(query_container_id) self.clipper_query_port = find_unbound_port(self.clipper_query_port) self.clipper_rpc_port = find_unbound_port(self.clipper_rpc_port) + query_labels = self.common_labels.copy() + query_labels[CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL] = "" + query_labels[CLIPPER_DOCKER_PORT_LABELS['query_query']] = str( + self.clipper_query_port) + query_labels[CLIPPER_DOCKER_PORT_LABELS['query_rpc']] = str( + self.clipper_rpc_port) self.docker_client.containers.run( query_frontend_image, query_cmd, @@ -184,21 +208,49 @@ def start_clipper(self, run_query_frontend_metric_image( query_frontend_metric_name, self.docker_client, query_name, self.common_labels, self.extra_container_kwargs) - setup_metric_config(query_frontend_metric_name, - self.prom_config_path, + + self.prom_config_path = tempfile.NamedTemporaryFile( + 'w', suffix='.yml', delete=False).name + self.prom_config_path = os.path.realpath( + self.prom_config_path) # resolve symlink + setup_metric_config(query_frontend_metric_name, self.prom_config_path, CLIPPER_INTERNAL_METRIC_PORT) + self.prometheus_port = find_unbound_port(self.prometheus_port) - run_metric_image(self.docker_client, - self.common_labels, - self.prometheus_port, - self.prom_config_path, + metric_labels = self.common_labels.copy() + metric_labels[CLIPPER_DOCKER_PORT_LABELS['metric']] = str( + self.prometheus_port) + metric_labels[CLIPPER_METRIC_CONFIG_LABEL] = self.prom_config_path + run_metric_image(self.docker_client, metric_labels, + self.prometheus_port, self.prom_config_path, self.extra_container_kwargs) self.connect() def connect(self): - # No extra connection steps to take on connection - return + """ + Use the cluster name to update ports. Because they might not match as in + start_clipper the ports might be changed. + :return: None + """ + containers = self.docker_client.containers.list( + filters={ + 'label': + ['ai.clipper.container.label={}'.format(self.cluster_name)] + }) + all_labels = {} + for container in containers: + all_labels.update(container.labels) + + self.redis_port = all_labels[CLIPPER_DOCKER_PORT_LABELS['redis']] + self.clipper_management_port = all_labels[CLIPPER_DOCKER_PORT_LABELS[ + 'management']] + self.clipper_query_port = all_labels[CLIPPER_DOCKER_PORT_LABELS[ + 'query_query']] + self.clipper_rpc_port = all_labels[CLIPPER_DOCKER_PORT_LABELS[ + 'query_rpc']] + self.prometheus_port = all_labels[CLIPPER_DOCKER_PORT_LABELS['metric']] + self.prom_config_path = all_labels[CLIPPER_METRIC_CONFIG_LABEL] def deploy_model(self, name, version, input_type, image, num_replicas=1): # Parameters @@ -213,8 +265,9 @@ def deploy_model(self, name, version, input_type, image, num_replicas=1): def _get_replicas(self, name, version): containers = self.docker_client.containers.list( filters={ - "label":[ - "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name), + "label": [ + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name), "{key}={val}".format( key=CLIPPER_MODEL_CONTAINER_LABEL, val=create_model_container_label(name, version)) @@ -230,7 +283,8 @@ def _add_replica(self, name, version, input_type, image): containers = self.docker_client.containers.list( filters={ "label": [ - "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name), + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name), CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL ] }) @@ -262,8 +316,7 @@ def _add_replica(self, name, version, input_type, image): **self.extra_container_kwargs) # Metric Section - add_to_metric_config(model_container_name, - self.prom_config_path, + add_to_metric_config(model_container_name, self.prom_config_path, self.prometheus_port, CLIPPER_INTERNAL_METRIC_PORT) @@ -306,14 +359,16 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): cur_container = current_replicas.pop() cur_container.stop() # Metric Section - delete_from_metric_config(cur_container.name, self.prom_config_path, self.prometheus_port) + delete_from_metric_config(cur_container.name, + self.prom_config_path, + self.prometheus_port) def get_logs(self, logging_dir): containers = self.docker_client.containers.list( filters={ - "label": "{key}={val}".format( - key=CLIPPER_DOCKER_LABEL, - val=self.cluster_name) + "label": + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) }) logging_dir = os.path.abspath(os.path.expanduser(logging_dir)) @@ -356,9 +411,9 @@ def stop_all_model_containers(self): def stop_all(self): containers = self.docker_client.containers.list( filters={ - "label": "{key}={val}".format( - key=CLIPPER_DOCKER_LABEL, - val=self.cluster_name) + "label": + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) }) for c in containers: c.stop() diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index 6c096143f..b83817088 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -43,8 +43,7 @@ def run_query_frontend_metric_image(name, docker_client, query_name, **extra_container_kwargs) -def setup_metric_config(query_frontend_metric_name, - prom_config_path, +def setup_metric_config(query_frontend_metric_name, prom_config_path, CLIPPER_INTERNAL_METRIC_PORT): """ Write to file prometheus.yml after frontend-metric is setup. @@ -107,7 +106,8 @@ def run_metric_image(docker_client, common_labels, prometheus_port, **extra_container_kwargs) -def add_to_metric_config(model_container_name, prom_config_path, prometheus_port, CLIPPER_INTERNAL_METRIC_PORT): +def add_to_metric_config(model_container_name, prom_config_path, + prometheus_port, CLIPPER_INTERNAL_METRIC_PORT): """ Add a new model container to the prometheus.yml configuration file. :param model_container_name: New model container name, need to be unique. @@ -144,10 +144,12 @@ def add_to_metric_config(model_container_name, prom_config_path, prometheus_port with open(prom_config_path, 'w') as f: yaml.dump(conf, f) - requests.post('http://localhost:{prometheus_port}/-/reload'.format(prometheus_port=prometheus_port)) + requests.post('http://localhost:{prometheus_port}/-/reload'.format( + prometheus_port=prometheus_port)) -def delete_from_metric_config(model_container_name, prom_config_path, prometheus_port): +def delete_from_metric_config(model_container_name, prom_config_path, + prometheus_port): """ Delete the stored model container from the prometheus.yml configuration file. :param model_container_name: the model container name to be deleted. @@ -164,4 +166,5 @@ def delete_from_metric_config(model_container_name, prom_config_path, prometheus with open(prom_config_path, 'w') as f: yaml.dump(conf, f) - requests.post('http://localhost:{prometheus_port}/-/reload'.format(prometheus_port=prometheus_port)) + requests.post('http://localhost:{prometheus_port}/-/reload'.format( + prometheus_port=prometheus_port)) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 87db6cffd..be9d5cd78 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -206,8 +206,8 @@ def _start_prometheus(self): configmap_body["metadata"]["labels"] with _pass_conflicts(): - self._k8s_v1.create_namespaced_config_map(body=configmap_body, namespace=self.namespace) - + self._k8s_v1.create_namespaced_config_map( + body=configmap_body, namespace=self.namespace) def connect(self): nodes = self._k8s_v1.list_node() diff --git a/integration-tests/multi_tenancy_docker.py b/integration-tests/multi_tenancy_docker.py index ed8345058..5f152fade 100644 --- a/integration-tests/multi_tenancy_docker.py +++ b/integration-tests/multi_tenancy_docker.py @@ -10,4 +10,3 @@ import logging from test_utils import (create_docker_connection, BenchmarkException, fake_model_data, headers, log_clipper_state) - From 2778c489b6515d7ee03461cd45e857ebe3346374 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 28 May 2018 17:59:17 -0700 Subject: [PATCH 07/63] Add templating to config yamls; add cluster_name --- .../clipper_admin/container_manager.py | 4 + .../docker/docker_container_manager.py | 4 + .../kubernetes_container_manager.py | 315 +++++++++--------- .../kubernetes/kubernetes_metric_utils.py | 62 ---- .../kubernetes/mgmt-frontend-deployment.yaml | 12 +- .../kubernetes/mgmt-frontend-service.yaml | 4 +- .../kubernetes/model-container-template.yaml | 45 +++ .../kubernetes/prom_configmap.yaml | 4 +- .../kubernetes/prom_deployment.yaml | 10 +- .../kubernetes/prom_service.yaml | 4 +- .../kubernetes/query-frontend-deployment.yaml | 21 +- .../query-frontend-rpc-service.yaml | 6 +- .../kubernetes/query-frontend-service.yaml | 4 +- .../kubernetes/redis-deployment.yaml | 6 +- .../kubernetes/redis-service.yaml | 6 +- clipper_admin/setup.py | 3 +- 16 files changed, 248 insertions(+), 262 deletions(-) create mode 100644 clipper_admin/clipper_admin/kubernetes/model-container-template.yaml diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 9b0702d83..1402af183 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -170,3 +170,7 @@ def get_admin_addr(self): @abc.abstractmethod def get_query_addr(self): return + + @abc.abstractmethod + def get_metric_addr(self): + return diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index af91c437f..70924501b 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -425,3 +425,7 @@ def get_admin_addr(self): def get_query_addr(self): return "{host}:{port}".format( host=self.public_hostname, port=self.clipper_query_port) + + def get_metric_addr(self): + return "{host}:{port}".format( + host=self.public_hostname, port=self.prometheus_port) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index be9d5cd78..ebfbd6e91 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -5,7 +5,7 @@ CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_METRIC_PORT) from ..exceptions import ClipperException -# from .kubernetes_metric_utils import start_prometheus, CLIPPER_FRONTEND_EXPORTER_IMAGE +from .kubernetes_metric_utils import PROM_VERSION, CLIPPER_FRONTEND_EXPORTER_IMAGE from contextlib import contextmanager from kubernetes import client, config @@ -16,11 +16,37 @@ import yaml import os import time +import jinja2 CLIPPER_QUERY_FRONTEND_DEPLOYMENT_LABEL = "ai.clipper.name=query-frontend" logger = logging.getLogger(__name__) cur_dir = os.path.dirname(os.path.abspath(__file__)) +CONFIG_FILES = { + 'redis':{ + 'service': 'redis-service.yaml', + 'deployment': 'redis-deployment.yaml' + }, + 'management':{ + 'service': 'mgmt-frontend-service.yaml', + 'deployment': 'mgmt-frontend-deployment.yaml' + }, + 'query':{ + 'service': { + 'query': 'query-frontend-service.yaml', + 'rpc': 'query-frontend-rpc-service.yaml' + }, + 'deployment': 'query-frontend-deployment.yaml', + }, + 'metric':{ + 'service': 'prom_service.yaml', + 'deployment':'prom_deployment.yaml', + 'config': 'prom_configmap.yaml' + }, + 'model': { + 'deployment': 'model-container-template.yaml' + } +} @contextmanager @@ -97,117 +123,138 @@ def __init__(self, self._k8s_beta = client.ExtensionsV1beta1Api() self.namespace = kubernetes_namespace + # Create the template engine + # Config: Any variable missing -> Error + self.template_engine = jinja2.Environment( + loader=jinja2.FileSystemLoader(cur_dir, followlinks=True), + undefined=jinja2.StrictUndefined) + def start_clipper(self, query_frontend_image, mgmt_frontend_image, cache_size, num_frontend_replicas=1): + self._start_redis() + self._start_mgmt(mgmt_frontend_image) self.num_frontend_replicas = num_frontend_replicas + self._start_query(query_frontend_image, cache_size, num_frontend_replicas) + self._start_prometheus() + self.connect() + def _start_redis(self, sleep_time=5): # If an existing Redis service isn't provided, start one if self.redis_ip is None: - name = 'redis' with _pass_conflicts(): self._k8s_beta.create_namespaced_deployment( - body=yaml.load( - open( - os.path.join(cur_dir, - '{}-deployment.yaml'.format(name)))), - namespace='default') + body=self._generate_config( + CONFIG_FILES['redis']['deployment'], + cluster_name=self.cluster_name + ), + namespace=self.namespace) with _pass_conflicts(): - body = yaml.load( - open( - os.path.join(cur_dir, '{}-service.yaml'.format(name)))) - body["spec"]["ports"][0]["port"] = self.redis_port + body = self._generate_config( + CONFIG_FILES['redis']['service'], + public_redis_port=self.redis_port, + cluster_name=self.cluster_name + ) self._k8s_v1.create_namespaced_service( - body=body, namespace='default') - time.sleep(10) + body=body, namespace=self.namespace) + time.sleep(sleep_time) - for name, img in zip(['mgmt-frontend', 'query-frontend'], - [mgmt_frontend_image, query_frontend_image]): + self.redis_ip = 'redis-at-{cluster_name}'.format(cluster_name=self.cluster_name) + + + def _start_mgmt(self, mgmt_image): + with _pass_conflicts(): + mgmt_depolyment_data = self._generate_config( + CONFIG_FILES['management']['deployment'], + image=mgmt_image, + redis_service_host=self.redis_ip, + redis_service_port=self.redis_port, + cluster_name=self.cluster_name + ) + self._k8s_beta.create_namespaced_deployment( + body=mgmt_depolyment_data, + namespace='default') + + with _pass_conflicts(): + mgmt_service_data = self._generate_config( + CONFIG_FILES['management']['service'], + cluster_name=self.cluster_name + ) + self._k8s_v1.create_namespaced_service( + body=mgmt_service_data, namespace=self.namespace) + + def _start_query(self, query_image, cache_size, num_replicas): + for query_frontend_id in range(num_replicas): with _pass_conflicts(): - body = yaml.load( - open( - os.path.join(cur_dir, - '{}-deployment.yaml'.format(name)))) - if self.redis_ip is not None: - args = [ - "--redis_ip={}".format(self.redis_ip), - "--redis_port={}".format(self.redis_port) - ] - if name is 'query-frontend': - args.append( - "--prediction_cache_size={}".format(cache_size)) - body["spec"]["template"]["spec"]["containers"][0][ - "args"] = args - body["spec"]["template"]["spec"]["containers"][0][ - "image"] = img - - if name is 'query-frontend': - # Create multiple query frontend - for query_frontend_id in range(num_frontend_replicas): - # Create single query frontend depolyment - body['metadata']['name'] = 'query-frontend-{}'.format( - query_frontend_id) - body['spec']['template']['metadata']['labels'][ - CLIPPER_QUERY_FRONTEND_ID_LABEL] = str( - query_frontend_id) - - body['spec']['template']['spec']['containers'][1][ - 'image'] = CLIPPER_FRONTEND_EXPORTER_IMAGE - - self._k8s_beta.create_namespaced_deployment( - body=body, namespace='default') - - # Create single query frontend rpc service - # Don't confuse this with query frontend service, which - # is created after the loop as a single user facing - # service. - rpc_service_body = yaml.load( - open( - os.path.join( - cur_dir, - '{}-rpc-service.yaml'.format(name)))) - rpc_service_body['metadata'][ - 'name'] = 'query-frontend-{}'.format( - query_frontend_id) - rpc_service_body['spec']['selector'][ - CLIPPER_QUERY_FRONTEND_ID_LABEL] = str( - query_frontend_id) - - self._k8s_v1.create_namespaced_service( - body=rpc_service_body, namespace='default') - - else: - self._k8s_beta.create_namespaced_deployment( - body=body, namespace='default') + query_deployment_data = self._generate_config( + CONFIG_FILES['query']['deployment'], + image=query_image, + exporter_image = CLIPPER_FRONTEND_EXPORTER_IMAGE, + redis_service_host=self.redis_ip, + redis_service_port=self.redis_port, + cache_size = cache_size, + name='query-frontend-{}'.format(query_frontend_id), + id_label = str(query_frontend_id), + cluster_name=self.cluster_name + ) + self._k8s_beta.create_namespaced_deployment( + body=query_deployment_data, + namespace=self.namespace) with _pass_conflicts(): - body = yaml.load( - open( - os.path.join(cur_dir, '{}-service.yaml'.format(name)))) + query_rpc_service_data = self._generate_config( + CONFIG_FILES['query']['service']['rpc'], + name='query-frontend-{}'.format(query_frontend_id), + id_label=str(query_frontend_id), + cluster_name=self.cluster_name + ) self._k8s_v1.create_namespaced_service( - body=body, namespace='default') + body=query_rpc_service_data, namespace=self.namespace) - self._start_prometheus() + with _pass_conflicts(): + query_frontend_service_data = self._generate_config( + CONFIG_FILES['query']['service']['query'], + cluster_name=self.cluster_name + ) + self._k8s_v1.create_namespaced_service( + body=query_frontend_service_data, namespace=self.namespace) - self.connect() def _start_prometheus(self): - prom_deployment_path = os.path.join(cur_dir, 'prom_deployment.yaml') - prom_service_path = os.path.join(cur_dir, 'prom_service.yaml') - prom_configmap_path = os.path.join(cur_dir, 'prom_configmap.yaml') - frontend_exporter_deployment_path = os.path.join( - cur_dir, 'frontend-exporter-deployment.yaml') + with _pass_conflicts(): + configmap_data = self._generate_config( + CONFIG_FILES['metric']['config'], + cluster_name=self.cluster_name + ) + self._k8s_v1.create_namespaced_config_map( + body=configmap_data, namespace=self.namespace) - with open(prom_configmap_path, 'r') as f: - configmap_body = yaml.load(f) - configmap_body["metadata"]["labels"] + with _pass_conflicts(): + deployment_data = self._generate_config( + CONFIG_FILES['metric']['deployment'], + version=PROM_VERSION, + cluster_name=self.cluster_name, + ) + self._k8s_beta.create_namespaced_deployment( + body=deployment_data, namespace=self.namespace) with _pass_conflicts(): - self._k8s_v1.create_namespaced_config_map( - body=configmap_body, namespace=self.namespace) + service_data = self._generate_config( + CONFIG_FILES['metric']['service'], + cluster_name=self.cluster_name, + ) + self._k8s_v1.create_namespaced_service( + body=service_data, namespace=self.namespace) + + def _generate_config(self, file_path, **kwargs): + template = self.template_engine.get_template(file_path) + rendered = template.render(**kwargs) + parsed = yaml.load(rendered) + return parsed + def connect(self): nodes = self._k8s_v1.list_node() @@ -237,7 +284,7 @@ def connect(self): try: mgmt_frontend_ports = self._k8s_v1.read_namespaced_service( - name="mgmt-frontend", namespace='default').spec.ports + name="mgmt-frontend-at-{cluster_name}".format(cluster_name=self.cluster_name), namespace='default').spec.ports for p in mgmt_frontend_ports: if p.name == "1338": self.clipper_management_port = p.node_port @@ -245,7 +292,7 @@ def connect(self): self.clipper_management_port)) query_frontend_ports = self._k8s_v1.read_namespaced_service( - name="query-frontend", namespace='default').spec.ports + name="query-frontend-at-{cluster_name}".format(cluster_name=self.cluster_name), namespace='default').spec.ports for p in query_frontend_ports: if p.name == "1337": self.clipper_query_port = p.node_port @@ -260,7 +307,7 @@ def connect(self): self.num_frontend_replicas = len(query_frontend_deployments) metrics_ports = self._k8s_v1.read_namespaced_service( - name="metrics", namespace='default').spec.ports + name="metrics-at-{cluster_name}".format(cluster_name=self.cluster_name), namespace='default').spec.ports for p in metrics_ports: if p.name == "9090": self.clipper_metric_port = p.node_port @@ -279,77 +326,23 @@ def deploy_model(self, name, version, input_type, image, num_replicas=1): with _pass_conflicts(): for query_frontend_id in range(self.num_frontend_replicas): deployment_name = get_model_deployment_name( - name, version, query_frontend_id) - body = { - 'apiVersion': 'extensions/v1beta1', - 'kind': 'Deployment', - 'metadata': { - "name": deployment_name, - "label": { - "test": "readiness" - }, - }, - 'spec': { - 'replicas': num_replicas, - 'template': { - 'metadata': { - 'labels': { - CLIPPER_MODEL_CONTAINER_LABEL: - create_model_container_label( - name, version), - CLIPPER_DOCKER_LABEL: - "" - }, - 'annotations': { - "prometheus.io/scrape": "true", - "prometheus.io/port": "1390", - "test": "readiness", - } - }, - 'spec': { - 'containers': [{ - 'name': - deployment_name, - 'image': - image, - 'imagePullPolicy': - 'Always', - 'readinessProbe': { - 'exec': { - 'command': - ['cat', '/model_is_ready.check'] - }, - 'initialDelaySeconds': 3, - 'periodSeconds': 3 - }, - 'ports': [{ - 'containerPort': 80 - }, { - 'containerPort': 1390 - }], - 'env': [{ - 'name': 'CLIPPER_MODEL_NAME', - 'value': name - }, { - 'name': 'CLIPPER_MODEL_VERSION', - 'value': str(version) - }, { - 'name': - 'CLIPPER_IP', - 'value': - 'query-frontend-{}'.format( - query_frontend_id) - }, { - 'name': 'CLIPPER_INPUT_TYPE', - 'value': input_type - }] - }] - } - } - } - } + name, version, query_frontend_id, self.cluster_name) + + generated_body = self._generate_config( + CONFIG_FILES['model']['deployment'], + deployment_name=deployment_name, + num_replicas=num_replicas, + container_label=create_model_container_label(name, version), + model_name=name, + version=version, + query_frontend_id=query_frontend_id, + input_type=input_type, + image=image, + cluster_name=self.cluster_name + ) + self._k8s_beta.create_namespaced_deployment( - body=body, namespace='default') + body=generated_body, namespace='default') while self._k8s_beta.read_namespaced_deployment_status( name=deployment_name, namespace='default').status.available_replicas \ @@ -514,6 +507,6 @@ def get_metric_addr(self): port=self.clipper_metric_port) -def get_model_deployment_name(name, version, query_frontend_id): +def get_model_deployment_name(name, version, query_frontend_id, cluster_name): return "{name}-{version}-deployment-at-{query_frontend_id}".format( - name=name, version=version, query_frontend_id=query_frontend_id) + name=name, version=version, query_frontend_id=query_frontend_id, cluster_name=cluster_name) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py index ff9dac2ae..f3c70d14c 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_metric_utils.py @@ -1,67 +1,5 @@ -import yaml -import os -from kubernetes.client import V1ConfigMap -from contextlib import contextmanager -from kubernetes.client.rest import ApiException -import logging -import json from ..version import __version__ -_cur_dir = os.path.dirname(os.path.abspath(__file__)) -prom_deployment_path = os.path.join(_cur_dir, 'prom_deployment.yaml') -prom_service_path = os.path.join(_cur_dir, 'prom_service.yaml') -prom_configmap_path = os.path.join(_cur_dir, 'prom_configmap.yaml') -frontend_exporter_deployment_path = os.path.join( - _cur_dir, 'frontend-exporter-deployment.yaml') - -logger = logging.getLogger(__name__) - PROM_VERSION = "v2.1.0" CLIPPER_FRONTEND_EXPORTER_IMAGE = "clipper/frontend-exporter:{}".format( __version__) - - -@contextmanager -def _pass_conflicts(): - try: - yield - except ApiException as e: - body = json.loads(e.body) - if body['reason'] == 'AlreadyExists': - logger.info("{} already exists, skipping!".format(body['details'])) - pass - else: - raise e - - -def _create_prometheus_configmap(_k8s_v1, namespace): - with open(prom_configmap_path, 'r') as f: - data = yaml.load(f) - - with _pass_conflicts(): - _k8s_v1.create_namespaced_config_map(body=data, namespace=namespace) - - -def _create_prometheus_deployment(_k8s_beta, namespace): - with open(prom_deployment_path, 'r') as f: - data = yaml.load(f) - - data['spec']['template']['spec']['containers'][0][ - 'image'] = "prom/prometheus:{version}".format(version=PROM_VERSION) - - with _pass_conflicts(): - _k8s_beta.create_namespaced_deployment(body=data, namespace=namespace) - - -def _create_prometheus_service(_k8s_v1, namespace): - with open(prom_service_path, 'r') as f: - data = yaml.load(f) - - with _pass_conflicts(): - _k8s_v1.create_namespaced_service(body=data, namespace=namespace) - - -def start_prometheus(_k8s_v1, _k8s_beta, namespace): - _create_prometheus_configmap(_k8s_v1, namespace) - _create_prometheus_deployment(_k8s_beta, namespace) - _create_prometheus_service(_k8s_v1, namespace) diff --git a/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-deployment.yaml b/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-deployment.yaml index 07987d51b..a3016df31 100644 --- a/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-deployment.yaml +++ b/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-deployment.yaml @@ -2,22 +2,22 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: mgmt-frontend - name: mgmt-frontend + name: mgmt-frontend-at-{{ cluster_name }} spec: replicas: 1 template: metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: mgmt-frontend spec: containers: - args: - - "--redis_ip=$(REDIS_SERVICE_HOST)" - - "--redis_port=$(REDIS_SERVICE_PORT)" - image: clipper/management_frontend + - "--redis_ip={{ redis_service_host | default('$(REDIS_SERVICE_HOST)', true) }}" # If redis_service_host == None, default to env var + - "--redis_port={{ redis_service_port | default('$(REDIS_SERVICE_PORT)', true) }}" + image: {{ image }} imagePullPolicy: Always name: mgmt-frontend ports: diff --git a/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml b/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml index 4cf5fa239..1f5b3290c 100644 --- a/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml @@ -2,9 +2,9 @@ apiVersion: v1 kind: Service metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: mgmt-frontend - name: mgmt-frontend + name: mgmt-frontend-at-{{ cluster_name }} spec: type: NodePort ports: diff --git a/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml b/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml new file mode 100644 index 000000000..9b9cb7faf --- /dev/null +++ b/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml @@ -0,0 +1,45 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + label: + test: readiness + ai.clipper.container.label: {{ cluster_name }} + ai.clipper.model_container.label: {{ container_label }} + ai.clipper.name: model + name: {{ deployment_name }} # Cluster name included +spec: + replicas: {{ num_replicas }} + template: + metadata: + annotations: + prometheus.io/port: "1390" + prometheus.io/scrape: "true" + test: readiness + labels: + ai.clipper.container.label: {{ cluster_name }} + ai.clipper.model_container.label: {{ container_label }} + ai.clipper.name: model + spec: + containers: + - env: + - name: CLIPPER_MODEL_NAME + value: {{ model_name }} + - name: CLIPPER_MODEL_VERSION + value: "{{ version }}" + - name: CLIPPER_IP + value: query-frontend-{{ query_frontend_id }} + - name: CLIPPER_INPUT_TYPE + value: {{ input_type }} + image: {{ image }} + imagePullPolicy: Always + name: {{ deployment_name }} + ports: + - containerPort: 80 + - containerPort: 1390 + readinessProbe: + exec: + command: + - cat + - /model_is_ready.check + initialDelaySeconds: 3 + periodSeconds: 3 diff --git a/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml b/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml index da7bae88d..253d482dd 100644 --- a/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml +++ b/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml @@ -29,7 +29,7 @@ data: kind: ConfigMap metadata: - name: prometheus-config + name: prometheus-config-at-{{ cluster_name }} labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: metrics \ No newline at end of file diff --git a/clipper_admin/clipper_admin/kubernetes/prom_deployment.yaml b/clipper_admin/clipper_admin/kubernetes/prom_deployment.yaml index db834f0bd..dbc404678 100644 --- a/clipper_admin/clipper_admin/kubernetes/prom_deployment.yaml +++ b/clipper_admin/clipper_admin/kubernetes/prom_deployment.yaml @@ -2,19 +2,19 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: metrics - name: metrics + name: metrics-at-{{ cluster_name }} spec: replicas: 1 template: metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: metrics spec: containers: - - image: prom/prometheus #The version will be appended in kubernetes_metric_utils + - image: prom/prometheus:{{ version }} args: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus" @@ -30,7 +30,7 @@ spec: volumes: - name: config-volume configMap: - name: prometheus-config #This name needs to match the configmap created in the same namespace + name: prometheus-config-at-{{ cluster_name }} #This name needs to match the configmap created in the same namespace items: - key: prom_config.yml #The name of the "data" field inside configmap path: prometheus.yml #Write the a file called prometheus.yml inside mountPath diff --git a/clipper_admin/clipper_admin/kubernetes/prom_service.yaml b/clipper_admin/clipper_admin/kubernetes/prom_service.yaml index 381092e93..00b490509 100644 --- a/clipper_admin/clipper_admin/kubernetes/prom_service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/prom_service.yaml @@ -2,9 +2,9 @@ apiVersion: v1 kind: Service metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: metrics - name: metrics + name: metrics-at-{{ cluster_name }} spec: type: NodePort ports: diff --git a/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml b/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml index 0b3aaf66d..13021e863 100644 --- a/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml +++ b/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml @@ -4,9 +4,9 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: query-frontend - name: null # Name must to be set in container_manager + name: {{ name }}-at-{{ cluster_name }} spec: replicas: 1 template: @@ -15,15 +15,16 @@ spec: "prometheus.io/scrape": "true" "prometheus.io/port": "1390" labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: query-frontend - ai.clipper.query_frontend.id: null # This value must be set in container_manager + ai.clipper.query_frontend.id: "{{ id_label }}" spec: containers: - - args: - - "--redis_ip=$(REDIS_SERVICE_HOST)" - - "--redis_port=$(REDIS_SERVICE_PORT)" - image: clipper/query_frontend + - args: # If redis_service_host == None, default to env var + - "--redis_ip={{ redis_service_host | default('$(REDIS_SERVICE_HOST)', true) }}" + - "--redis_port={{ redis_service_port | default('$(REDIS_SERVICE_PORT)', true) }}" + - "--prediction_cache_size={{ cache_size }}" + image: {{ image }} imagePullPolicy: Always name: query-frontend ports: @@ -31,8 +32,8 @@ spec: - containerPort: 1337 - args: - "--query_frontend_name" - - "query-frontend:1337" - image: "clipper/frontend-exporter" + - "query-frontend-at-{{ cluster_name }}:1337" + image: {{ exporter_image }} name: frontend-exporter ports: - containerPort: 1390 diff --git a/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml b/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml index 43b22c20b..8e2b1fead 100644 --- a/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml @@ -2,9 +2,9 @@ apiVersion: v1 kind: Service metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: query-frontend - name: null # This value must be set in container_manager + name: {{ name }}-at-{{ cluster_name }} spec: type: NodePort ports: @@ -13,4 +13,4 @@ spec: targetPort: 7000 selector: ai.clipper.name: query-frontend - ai.clipper.query_frontend.id: null # This value must be set in container_manager + ai.clipper.query_frontend.id: "{{ id_label }}" diff --git a/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml b/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml index 678625d73..8b75ade28 100644 --- a/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml @@ -2,9 +2,9 @@ apiVersion: v1 kind: Service metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: query-frontend - name: query-frontend + name: query-frontend-at-{{ cluster_name }} spec: type: NodePort ports: diff --git a/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml b/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml index cc7bce16c..046d95f56 100644 --- a/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml +++ b/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml @@ -2,15 +2,15 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: redis - name: redis + name: redis-at-{{ cluster_name }} spec: replicas: 1 template: metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: redis spec: containers: diff --git a/clipper_admin/clipper_admin/kubernetes/redis-service.yaml b/clipper_admin/clipper_admin/kubernetes/redis-service.yaml index 30941c9cc..0a3491eca 100644 --- a/clipper_admin/clipper_admin/kubernetes/redis-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/redis-service.yaml @@ -2,14 +2,14 @@ apiVersion: v1 kind: Service metadata: labels: - ai.clipper.container.label: "" + ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: redis - name: redis + name: redis-at-{{ cluster_name }} spec: type: NodePort ports: - name: "6379" - port: 6379 + port: {{ public_redis_port }} targetPort: 6379 selector: ai.clipper.name: redis diff --git a/clipper_admin/setup.py b/clipper_admin/setup.py index 9a9b5325c..10b74d76d 100644 --- a/clipper_admin/setup.py +++ b/clipper_admin/setup.py @@ -28,7 +28,8 @@ install_requires=[ 'requests', 'numpy', 'subprocess32; python_version<"3"', 'pyyaml', 'docker', 'kubernetes>=6.0.0', 'prometheus_client', 'cloudpickle>=0.5', - 'enum34; python_version<"3.4"', 'redis', 'psutil', 'jsonschema' + 'enum34; python_version<"3.4"', 'redis', 'psutil', 'jsonschema', + 'jinja2' ], extras_require={ 'PySpark': ['pyspark'], From 5cd53a7e7f5c0046fb30adb71a62b34d1e30a11f Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 29 May 2018 12:53:22 -0700 Subject: [PATCH 08/63] Seperation of service done; need check conn --- clipper_admin/clipper_admin/container_manager.py | 1 + .../kubernetes/kubernetes_container_manager.py | 8 +++----- .../clipper_admin/kubernetes/mgmt-frontend-service.yaml | 1 + .../kubernetes/model-container-template.yaml | 2 +- .../clipper_admin/kubernetes/prom_configmap.yaml | 3 +++ .../kubernetes/query-frontend-rpc-service.yaml | 1 + .../clipper_admin/kubernetes/query-frontend-service.yaml | 1 + clipper_admin/clipper_admin/kubernetes/redis-service.yaml | 1 + 8 files changed, 12 insertions(+), 6 deletions(-) diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 1402af183..52e1e8735 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -11,6 +11,7 @@ CLIPPER_INTERNAL_REDIS_PORT = 6379 CLIPPER_DOCKER_LABEL = "ai.clipper.container.label" +CLIPPER_NAME_LABEL = "ai.clipper.name" CLIPPER_MODEL_CONTAINER_LABEL = "ai.clipper.model_container.label" CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL = "ai.clipper.query_frontend.label" CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL = "ai.clipper.management_frontend.label" diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index ebfbd6e91..c21620998 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -3,7 +3,7 @@ create_model_container_label, ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_ID_LABEL, CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_QUERY_PORT, - CLIPPER_INTERNAL_METRIC_PORT) + CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_NAME_LABEL) from ..exceptions import ClipperException from .kubernetes_metric_utils import PROM_VERSION, CLIPPER_FRONTEND_EXPORTER_IMAGE @@ -18,8 +18,6 @@ import time import jinja2 -CLIPPER_QUERY_FRONTEND_DEPLOYMENT_LABEL = "ai.clipper.name=query-frontend" - logger = logging.getLogger(__name__) cur_dir = os.path.dirname(os.path.abspath(__file__)) CONFIG_FILES = { @@ -303,7 +301,7 @@ def connect(self): query_frontend_deployments = self._k8s_beta.list_namespaced_deployment( namespace="default", - label_selector=CLIPPER_QUERY_FRONTEND_DEPLOYMENT_LABEL).items + label_selector="{name_label}=query-frontend, {cluster_label}={cluster_name}".format(name_label=CLIPPER_NAME_LABEL, cluster_label=CLIPPER_DOCKER_LABEL, cluster_name=self.cluster_name)).items self.num_frontend_replicas = len(query_frontend_deployments) metrics_ports = self._k8s_v1.read_namespaced_service( @@ -508,5 +506,5 @@ def get_metric_addr(self): def get_model_deployment_name(name, version, query_frontend_id, cluster_name): - return "{name}-{version}-deployment-at-{query_frontend_id}".format( + return "{name}-{version}-deployment-at-{query_frontend_id}-at-{cluster_name}".format( name=name, version=version, query_frontend_id=query_frontend_id, cluster_name=cluster_name) diff --git a/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml b/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml index 1f5b3290c..710c70e20 100644 --- a/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/mgmt-frontend-service.yaml @@ -13,3 +13,4 @@ spec: targetPort: 1338 selector: ai.clipper.name: mgmt-frontend + ai.clipper.container.label: {{ cluster_name }} diff --git a/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml b/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml index 9b9cb7faf..8610ff478 100644 --- a/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml +++ b/clipper_admin/clipper_admin/kubernetes/model-container-template.yaml @@ -27,7 +27,7 @@ spec: - name: CLIPPER_MODEL_VERSION value: "{{ version }}" - name: CLIPPER_IP - value: query-frontend-{{ query_frontend_id }} + value: query-frontend-{{ query_frontend_id }}-at-{{ cluster_name }} - name: CLIPPER_INPUT_TYPE value: {{ input_type }} image: {{ image }} diff --git a/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml b/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml index 253d482dd..466355608 100644 --- a/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml +++ b/clipper_admin/clipper_admin/kubernetes/prom_configmap.yaml @@ -13,6 +13,9 @@ data: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true + - source_labels: [__meta_kubernetes_pod_label_ai_clipper_container_label] + action: keep + regex: {{cluster_name}} - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) diff --git a/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml b/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml index 8e2b1fead..dee9cef77 100644 --- a/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/query-frontend-rpc-service.yaml @@ -14,3 +14,4 @@ spec: selector: ai.clipper.name: query-frontend ai.clipper.query_frontend.id: "{{ id_label }}" + ai.clipper.container.label: {{ cluster_name }} diff --git a/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml b/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml index 8b75ade28..2644719e5 100644 --- a/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/query-frontend-service.yaml @@ -13,3 +13,4 @@ spec: targetPort: 1337 selector: ai.clipper.name: query-frontend + ai.clipper.container.label: {{ cluster_name }} diff --git a/clipper_admin/clipper_admin/kubernetes/redis-service.yaml b/clipper_admin/clipper_admin/kubernetes/redis-service.yaml index 0a3491eca..11fe8528c 100644 --- a/clipper_admin/clipper_admin/kubernetes/redis-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/redis-service.yaml @@ -13,3 +13,4 @@ spec: targetPort: 6379 selector: ai.clipper.name: redis + ai.clipper.container.label: {{ cluster_name }} From a51eb617aa00b449414a7a1c4b873e48965ee6f7 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 29 May 2018 13:05:24 -0700 Subject: [PATCH 09/63] Format Code --- .../kubernetes_container_manager.py | 130 +++++++++--------- 1 file changed, 66 insertions(+), 64 deletions(-) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index c21620998..4ca5a592f 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -21,24 +21,24 @@ logger = logging.getLogger(__name__) cur_dir = os.path.dirname(os.path.abspath(__file__)) CONFIG_FILES = { - 'redis':{ - 'service': 'redis-service.yaml', - 'deployment': 'redis-deployment.yaml' + 'redis': { + 'service': 'redis-service.yaml', + 'deployment': 'redis-deployment.yaml' }, - 'management':{ - 'service': 'mgmt-frontend-service.yaml', - 'deployment': 'mgmt-frontend-deployment.yaml' + 'management': { + 'service': 'mgmt-frontend-service.yaml', + 'deployment': 'mgmt-frontend-deployment.yaml' }, - 'query':{ + 'query': { 'service': { 'query': 'query-frontend-service.yaml', - 'rpc': 'query-frontend-rpc-service.yaml' + 'rpc': 'query-frontend-rpc-service.yaml' }, 'deployment': 'query-frontend-deployment.yaml', }, - 'metric':{ + 'metric': { 'service': 'prom_service.yaml', - 'deployment':'prom_deployment.yaml', + 'deployment': 'prom_deployment.yaml', 'config': 'prom_configmap.yaml' }, 'model': { @@ -135,7 +135,8 @@ def start_clipper(self, self._start_redis() self._start_mgmt(mgmt_frontend_image) self.num_frontend_replicas = num_frontend_replicas - self._start_query(query_frontend_image, cache_size, num_frontend_replicas) + self._start_query(query_frontend_image, cache_size, + num_frontend_replicas) self._start_prometheus() self.connect() @@ -146,22 +147,20 @@ def _start_redis(self, sleep_time=5): self._k8s_beta.create_namespaced_deployment( body=self._generate_config( CONFIG_FILES['redis']['deployment'], - cluster_name=self.cluster_name - ), + cluster_name=self.cluster_name), namespace=self.namespace) with _pass_conflicts(): body = self._generate_config( CONFIG_FILES['redis']['service'], public_redis_port=self.redis_port, - cluster_name=self.cluster_name - ) + cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( body=body, namespace=self.namespace) time.sleep(sleep_time) - self.redis_ip = 'redis-at-{cluster_name}'.format(cluster_name=self.cluster_name) - + self.redis_ip = 'redis-at-{cluster_name}'.format( + cluster_name=self.cluster_name) def _start_mgmt(self, mgmt_image): with _pass_conflicts(): @@ -170,17 +169,14 @@ def _start_mgmt(self, mgmt_image): image=mgmt_image, redis_service_host=self.redis_ip, redis_service_port=self.redis_port, - cluster_name=self.cluster_name - ) + cluster_name=self.cluster_name) self._k8s_beta.create_namespaced_deployment( - body=mgmt_depolyment_data, - namespace='default') + body=mgmt_depolyment_data, namespace='default') with _pass_conflicts(): mgmt_service_data = self._generate_config( CONFIG_FILES['management']['service'], - cluster_name=self.cluster_name - ) + cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( body=mgmt_service_data, namespace=self.namespace) @@ -190,43 +186,37 @@ def _start_query(self, query_image, cache_size, num_replicas): query_deployment_data = self._generate_config( CONFIG_FILES['query']['deployment'], image=query_image, - exporter_image = CLIPPER_FRONTEND_EXPORTER_IMAGE, + exporter_image=CLIPPER_FRONTEND_EXPORTER_IMAGE, redis_service_host=self.redis_ip, redis_service_port=self.redis_port, - cache_size = cache_size, + cache_size=cache_size, name='query-frontend-{}'.format(query_frontend_id), - id_label = str(query_frontend_id), - cluster_name=self.cluster_name - ) + id_label=str(query_frontend_id), + cluster_name=self.cluster_name) self._k8s_beta.create_namespaced_deployment( - body=query_deployment_data, - namespace=self.namespace) + body=query_deployment_data, namespace=self.namespace) with _pass_conflicts(): query_rpc_service_data = self._generate_config( CONFIG_FILES['query']['service']['rpc'], name='query-frontend-{}'.format(query_frontend_id), id_label=str(query_frontend_id), - cluster_name=self.cluster_name - ) + cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( body=query_rpc_service_data, namespace=self.namespace) with _pass_conflicts(): query_frontend_service_data = self._generate_config( CONFIG_FILES['query']['service']['query'], - cluster_name=self.cluster_name - ) + cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( body=query_frontend_service_data, namespace=self.namespace) - def _start_prometheus(self): with _pass_conflicts(): configmap_data = self._generate_config( CONFIG_FILES['metric']['config'], - cluster_name=self.cluster_name - ) + cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_config_map( body=configmap_data, namespace=self.namespace) @@ -253,7 +243,6 @@ def _generate_config(self, file_path, **kwargs): parsed = yaml.load(rendered) return parsed - def connect(self): nodes = self._k8s_v1.list_node() @@ -282,7 +271,9 @@ def connect(self): try: mgmt_frontend_ports = self._k8s_v1.read_namespaced_service( - name="mgmt-frontend-at-{cluster_name}".format(cluster_name=self.cluster_name), namespace='default').spec.ports + name="mgmt-frontend-at-{cluster_name}".format( + cluster_name=self.cluster_name), + namespace='default').spec.ports for p in mgmt_frontend_ports: if p.name == "1338": self.clipper_management_port = p.node_port @@ -290,7 +281,9 @@ def connect(self): self.clipper_management_port)) query_frontend_ports = self._k8s_v1.read_namespaced_service( - name="query-frontend-at-{cluster_name}".format(cluster_name=self.cluster_name), namespace='default').spec.ports + name="query-frontend-at-{cluster_name}".format( + cluster_name=self.cluster_name), + namespace='default').spec.ports for p in query_frontend_ports: if p.name == "1337": self.clipper_query_port = p.node_port @@ -301,11 +294,18 @@ def connect(self): query_frontend_deployments = self._k8s_beta.list_namespaced_deployment( namespace="default", - label_selector="{name_label}=query-frontend, {cluster_label}={cluster_name}".format(name_label=CLIPPER_NAME_LABEL, cluster_label=CLIPPER_DOCKER_LABEL, cluster_name=self.cluster_name)).items + label_selector= + "{name_label}=query-frontend, {cluster_label}={cluster_name}". + format( + name_label=CLIPPER_NAME_LABEL, + cluster_label=CLIPPER_DOCKER_LABEL, + cluster_name=self.cluster_name)).items self.num_frontend_replicas = len(query_frontend_deployments) metrics_ports = self._k8s_v1.read_namespaced_service( - name="metrics-at-{cluster_name}".format(cluster_name=self.cluster_name), namespace='default').spec.ports + name="metrics-at-{cluster_name}".format( + cluster_name=self.cluster_name), + namespace='default').spec.ports for p in metrics_ports: if p.name == "9090": self.clipper_metric_port = p.node_port @@ -321,31 +321,30 @@ def connect(self): "Reason: {}".format(e)) def deploy_model(self, name, version, input_type, image, num_replicas=1): - with _pass_conflicts(): - for query_frontend_id in range(self.num_frontend_replicas): - deployment_name = get_model_deployment_name( - name, version, query_frontend_id, self.cluster_name) - - generated_body = self._generate_config( - CONFIG_FILES['model']['deployment'], - deployment_name=deployment_name, - num_replicas=num_replicas, - container_label=create_model_container_label(name, version), - model_name=name, - version=version, - query_frontend_id=query_frontend_id, - input_type=input_type, - image=image, - cluster_name=self.cluster_name - ) + for query_frontend_id in range(self.num_frontend_replicas): + deployment_name = get_model_deployment_name( + name, version, query_frontend_id, self.cluster_name) + + generated_body = self._generate_config( + CONFIG_FILES['model']['deployment'], + deployment_name=deployment_name, + num_replicas=num_replicas, + container_label=create_model_container_label(name, version), + model_name=name, + version=version, + query_frontend_id=query_frontend_id, + input_type=input_type, + image=image, + cluster_name=self.cluster_name) + with _pass_conflicts(): self._k8s_beta.create_namespaced_deployment( body=generated_body, namespace='default') - while self._k8s_beta.read_namespaced_deployment_status( - name=deployment_name, namespace='default').status.available_replicas \ - != num_replicas: - time.sleep(3) + while self._k8s_beta.read_namespaced_deployment_status( + name=deployment_name, namespace='default').status.available_replicas \ + != num_replicas: + time.sleep(3) def get_num_replicas(self, name, version): deployment_name = get_model_deployment_name( @@ -507,4 +506,7 @@ def get_metric_addr(self): def get_model_deployment_name(name, version, query_frontend_id, cluster_name): return "{name}-{version}-deployment-at-{query_frontend_id}-at-{cluster_name}".format( - name=name, version=version, query_frontend_id=query_frontend_id, cluster_name=cluster_name) + name=name, + version=version, + query_frontend_id=query_frontend_id, + cluster_name=cluster_name) From 33189bfa9b6bdda9bde2fb01eb8d81ff9775b8d4 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 29 May 2018 15:21:58 -0700 Subject: [PATCH 10/63] Add tests, fix label selector --- bin/run_unittests.sh | 1 + .../docker/docker_container_manager.py | 12 +++- .../kubernetes_container_manager.py | 34 +++++---- integration-tests/multi_tenancy_docker.py | 12 ---- integration-tests/multi_tenency_test.py | 71 +++++++++++++++++++ integration-tests/test_utils.py | 11 +-- 6 files changed, 108 insertions(+), 33 deletions(-) delete mode 100644 integration-tests/multi_tenancy_docker.py create mode 100644 integration-tests/multi_tenency_test.py diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index 214094d04..b4be710f6 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -144,6 +144,7 @@ function run_integration_tests { ../integration-tests/r_integration_test/rclipper_test.sh python ../integration-tests/clipper_metric_docker.py python ../integration-tests/clipper_metric_kube.py + python ../integration-tests/multi_tenency_test.py echo "Exit code: $?" echo "GREPTHIS Done running unit tests" } diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 70924501b..574419bcf 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -24,7 +24,7 @@ class DockerContainerManager(ContainerManager): def __init__(self, - cluster_name, + cluster_name="default-cluster", docker_ip_address="localhost", clipper_query_port=1337, clipper_management_port=1338, @@ -305,6 +305,7 @@ def _add_replica(self, name, version, input_type, image): model_container_label = create_model_container_label(name, version) labels = self.common_labels.copy() labels[CLIPPER_MODEL_CONTAINER_LABEL] = model_container_label + labels[CLIPPER_DOCKER_LABEL] = self.cluster_name model_container_name = model_container_label + '-{}'.format( random.randint(0, 100000)) @@ -392,7 +393,9 @@ def get_logs(self, logging_dir): def stop_models(self, models): containers = self.docker_client.containers.list( filters={ - "label": CLIPPER_MODEL_CONTAINER_LABEL + "label": [ + CLIPPER_MODEL_CONTAINER_LABEL, + "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name)] }) for c in containers: c_name, c_version = parse_model_container_label( @@ -403,7 +406,10 @@ def stop_models(self, models): def stop_all_model_containers(self): containers = self.docker_client.containers.list( filters={ - "label": CLIPPER_MODEL_CONTAINER_LABEL + "label": [ + CLIPPER_MODEL_CONTAINER_LABEL, + "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) + ] }) for c in containers: c.stop() diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 4ca5a592f..9dda54995 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -62,7 +62,7 @@ def _pass_conflicts(): class KubernetesContainerManager(ContainerManager): def __init__(self, - cluster_name, + cluster_name="default-cluster", kubernetes_proxy_addr=None, redis_ip=None, redis_port=6379, @@ -411,9 +411,12 @@ def stop_models(self, models): for v in models[m]: self._k8s_beta.delete_collection_namespaced_deployment( namespace='default', - label_selector="{label}:{val}".format( + label_selector="{label}={val}, {cluster_label}={cluster_name}".format( label=CLIPPER_MODEL_CONTAINER_LABEL, - val=create_model_container_label(m, v))) + val=create_model_container_label(m, v), + cluster_label=CLIPPER_DOCKER_LABEL, + cluster_name=self.cluster_name + )) except ApiException as e: logger.warn( "Exception deleting kubernetes deployments: {}".format(e)) @@ -423,7 +426,11 @@ def stop_all_model_containers(self): try: self._k8s_beta.delete_collection_namespaced_deployment( namespace='default', - label_selector=CLIPPER_MODEL_CONTAINER_LABEL) + label_selector="{label}, {cluster_label}={cluster_name}".format( + label=CLIPPER_MODEL_CONTAINER_LABEL, + cluster_label=CLIPPER_DOCKER_LABEL, + cluster_name=self.cluster_name + )) except ApiException as e: logger.warn( "Exception deleting kubernetes deployments: {}".format(e)) @@ -432,10 +439,13 @@ def stop_all_model_containers(self): def stop_all(self): logger.info("Stopping all running Clipper resources") + cluster_selecter = "{cluster_label}={cluster_name}".format(cluster_label=CLIPPER_DOCKER_LABEL, + cluster_name=self.cluster_name) + try: for service in self._k8s_v1.list_namespaced_service( namespace='default', - label_selector=CLIPPER_DOCKER_LABEL).items: + label_selector=cluster_selecter).items: service_name = service.metadata.name self._k8s_v1.delete_namespaced_service( namespace='default', @@ -443,23 +453,19 @@ def stop_all(self): body=V1DeleteOptions()) self._k8s_beta.delete_collection_namespaced_deployment( - namespace='default', label_selector=CLIPPER_DOCKER_LABEL) + namespace='default', label_selector=cluster_selecter) self._k8s_beta.delete_collection_namespaced_replica_set( - namespace='default', label_selector=CLIPPER_DOCKER_LABEL) + namespace='default', label_selector=cluster_selecter) self._k8s_v1.delete_collection_namespaced_replication_controller( - namespace='default', label_selector=CLIPPER_DOCKER_LABEL) + namespace='default', label_selector=cluster_selecter) self._k8s_v1.delete_collection_namespaced_pod( - namespace='default', label_selector=CLIPPER_DOCKER_LABEL) - - self._k8s_v1.delete_collection_namespaced_pod( - namespace='default', - label_selector=CLIPPER_MODEL_CONTAINER_LABEL) + namespace='default', label_selector=cluster_selecter) self._k8s_v1.delete_collection_namespaced_config_map( - namespace='default', label_selector=CLIPPER_DOCKER_LABEL) + namespace='default', label_selector=cluster_selecter) except ApiException as e: logging.warn( "Exception deleting kubernetes resources: {}".format(e)) diff --git a/integration-tests/multi_tenancy_docker.py b/integration-tests/multi_tenancy_docker.py deleted file mode 100644 index 5f152fade..000000000 --- a/integration-tests/multi_tenancy_docker.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, print_function -import os -import sys -import requests -import json -import tempfile -import shutil -import numpy as np -import time -import logging -from test_utils import (create_docker_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state) diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenency_test.py new file mode 100644 index 000000000..5045024c9 --- /dev/null +++ b/integration-tests/multi_tenency_test.py @@ -0,0 +1,71 @@ +from clipper_admin import ClipperConnection, DockerContainerManager, KubernetesContainerManager +from clipper_admin.deployers import python as python_deployer + +import signal +import sys +import json +import requests +from datetime import datetime +import os +import time +from test_utils import create_kubernetes_connection, create_docker_connection +import click + +@click.command() +@click.option('--kubernetes', is_flag=True) +def test(kubernetes): + conn_1 = create('cluster-1', use_kubernetes=kubernetes) + conn_2 = create('cluster-2', use_kubernetes=kubernetes) + + deploy_(conn_1, use_kubernetes=kubernetes) + deploy_(conn_2, use_kubernetes=kubernetes) + + res_1 = predict_(conn_1.get_query_addr(), [.1,.2,.3]) + res_2 = predict_(conn_2.get_query_addr(), [.1,.2,.3]) + assert not res_1['default'] + assert not res_2['default'] + + conn_1.stop_all() + conn_2.stop_all() + + +def create(name, use_kubernetes=False): + if use_kubernetes: + conn = create_kubernetes_connection( + cleanup=True, start_clipper=True, name=name) + else: + conn = create_docker_connection( + cleanup=True, start_clipper=True, name=name) + return conn + +def feature_sum(xs): + return [str(sum(x)) for x in xs] + +def deploy_(clipper_conn, use_kubernetes=False): + if use_kubernetes: + python_deployer.create_endpoint(clipper_conn, "simple-example", "doubles", + feature_sum, + registry= + "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") + else: + python_deployer.create_endpoint(clipper_conn, "simple-example", "doubles", + feature_sum) + +def predict_(addr, x, batch=False): + url = "http://%s/simple-example/predict" % addr + + if batch: + req_json = json.dumps({'input_batch': x}) + else: + req_json = json.dumps({'input': list(x)}) + + headers = {'Content-type': 'application/json'} + start = datetime.now() + r = requests.post(url, headers=headers, data=req_json) + end = datetime.now() + latency = (end - start).total_seconds() * 1000.0 + print("'%s', %f ms" % (r.text, latency)) + return r.json() + +if __name__ == '__main__': + test() diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index ddc8ebab7..50cec259f 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -62,9 +62,10 @@ def find_unbound_port(): "randomly generated port %d is bound. Trying again." % port) -def create_docker_connection(cleanup=True, start_clipper=True): +def create_docker_connection(cleanup=True, start_clipper=True, name='default-cluster'): logger.info("Creating DockerContainerManager") cm = DockerContainerManager( + cluster_name=name, clipper_query_port=find_unbound_port(), clipper_management_port=find_unbound_port(), clipper_rpc_port=find_unbound_port(), @@ -88,6 +89,7 @@ def create_docker_connection(cleanup=True, start_clipper=True): "Problem starting Clipper: {}\nTrying again.".format(e)) cl.stop_all() cm = DockerContainerManager( + cluster_name=name, clipper_query_port=find_unbound_port(), clipper_management_port=find_unbound_port(), clipper_rpc_port=find_unbound_port(), @@ -102,12 +104,13 @@ def create_kubernetes_connection(cleanup=True, start_clipper=True, connect=True, with_proxy=False, - num_frontend_replicas=1): + num_frontend_replicas=1, + name='default-cluster'): logger.info("Creating KubernetesContainerManager") if with_proxy: - cm = KubernetesContainerManager(kubernetes_proxy_addr="127.0.0.1:8080") + cm = KubernetesContainerManager(cluster_name=name,kubernetes_proxy_addr="127.0.0.1:8080") else: - cm = KubernetesContainerManager() + cm = KubernetesContainerManager(cluster_name=name) cl = ClipperConnection(cm) if cleanup: cl.stop_all() From a2683ad3d47e050504791f9c1cc2c14731177c08 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 29 May 2018 15:25:29 -0700 Subject: [PATCH 11/63] Format code --- .../docker/docker_container_manager.py | 9 ++++---- .../kubernetes_container_manager.py | 16 ++++++------- integration-tests/multi_tenency_test.py | 23 ++++++++++++------- integration-tests/test_utils.py | 7 ++++-- 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 574419bcf..fdaf387dc 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -394,8 +394,9 @@ def stop_models(self, models): containers = self.docker_client.containers.list( filters={ "label": [ - CLIPPER_MODEL_CONTAINER_LABEL, - "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name)] + CLIPPER_MODEL_CONTAINER_LABEL, "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) + ] }) for c in containers: c_name, c_version = parse_model_container_label( @@ -407,8 +408,8 @@ def stop_all_model_containers(self): containers = self.docker_client.containers.list( filters={ "label": [ - CLIPPER_MODEL_CONTAINER_LABEL, - "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) + CLIPPER_MODEL_CONTAINER_LABEL, "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) ] }) for c in containers: diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 9dda54995..82cc7dbe5 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -411,12 +411,12 @@ def stop_models(self, models): for v in models[m]: self._k8s_beta.delete_collection_namespaced_deployment( namespace='default', - label_selector="{label}={val}, {cluster_label}={cluster_name}".format( + label_selector= + "{label}={val}, {cluster_label}={cluster_name}".format( label=CLIPPER_MODEL_CONTAINER_LABEL, val=create_model_container_label(m, v), cluster_label=CLIPPER_DOCKER_LABEL, - cluster_name=self.cluster_name - )) + cluster_name=self.cluster_name)) except ApiException as e: logger.warn( "Exception deleting kubernetes deployments: {}".format(e)) @@ -426,11 +426,11 @@ def stop_all_model_containers(self): try: self._k8s_beta.delete_collection_namespaced_deployment( namespace='default', - label_selector="{label}, {cluster_label}={cluster_name}".format( + label_selector="{label}, {cluster_label}={cluster_name}". + format( label=CLIPPER_MODEL_CONTAINER_LABEL, cluster_label=CLIPPER_DOCKER_LABEL, - cluster_name=self.cluster_name - )) + cluster_name=self.cluster_name)) except ApiException as e: logger.warn( "Exception deleting kubernetes deployments: {}".format(e)) @@ -439,8 +439,8 @@ def stop_all_model_containers(self): def stop_all(self): logger.info("Stopping all running Clipper resources") - cluster_selecter = "{cluster_label}={cluster_name}".format(cluster_label=CLIPPER_DOCKER_LABEL, - cluster_name=self.cluster_name) + cluster_selecter = "{cluster_label}={cluster_name}".format( + cluster_label=CLIPPER_DOCKER_LABEL, cluster_name=self.cluster_name) try: for service in self._k8s_v1.list_namespaced_service( diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenency_test.py index 5045024c9..f7142b9ab 100644 --- a/integration-tests/multi_tenency_test.py +++ b/integration-tests/multi_tenency_test.py @@ -11,6 +11,7 @@ from test_utils import create_kubernetes_connection, create_docker_connection import click + @click.command() @click.option('--kubernetes', is_flag=True) def test(kubernetes): @@ -20,8 +21,8 @@ def test(kubernetes): deploy_(conn_1, use_kubernetes=kubernetes) deploy_(conn_2, use_kubernetes=kubernetes) - res_1 = predict_(conn_1.get_query_addr(), [.1,.2,.3]) - res_2 = predict_(conn_2.get_query_addr(), [.1,.2,.3]) + res_1 = predict_(conn_1.get_query_addr(), [.1, .2, .3]) + res_2 = predict_(conn_2.get_query_addr(), [.1, .2, .3]) assert not res_1['default'] assert not res_2['default'] @@ -38,18 +39,23 @@ def create(name, use_kubernetes=False): cleanup=True, start_clipper=True, name=name) return conn + def feature_sum(xs): return [str(sum(x)) for x in xs] + def deploy_(clipper_conn, use_kubernetes=False): if use_kubernetes: - python_deployer.create_endpoint(clipper_conn, "simple-example", "doubles", - feature_sum, - registry= - "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") + python_deployer.create_endpoint( + clipper_conn, + "simple-example", + "doubles", + feature_sum, + registry="568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") else: - python_deployer.create_endpoint(clipper_conn, "simple-example", "doubles", - feature_sum) + python_deployer.create_endpoint(clipper_conn, "simple-example", + "doubles", feature_sum) + def predict_(addr, x, batch=False): url = "http://%s/simple-example/predict" % addr @@ -67,5 +73,6 @@ def predict_(addr, x, batch=False): print("'%s', %f ms" % (r.text, latency)) return r.json() + if __name__ == '__main__': test() diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 50cec259f..644ccc351 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -62,7 +62,9 @@ def find_unbound_port(): "randomly generated port %d is bound. Trying again." % port) -def create_docker_connection(cleanup=True, start_clipper=True, name='default-cluster'): +def create_docker_connection(cleanup=True, + start_clipper=True, + name='default-cluster'): logger.info("Creating DockerContainerManager") cm = DockerContainerManager( cluster_name=name, @@ -108,7 +110,8 @@ def create_kubernetes_connection(cleanup=True, name='default-cluster'): logger.info("Creating KubernetesContainerManager") if with_proxy: - cm = KubernetesContainerManager(cluster_name=name,kubernetes_proxy_addr="127.0.0.1:8080") + cm = KubernetesContainerManager( + cluster_name=name, kubernetes_proxy_addr="127.0.0.1:8080") else: cm = KubernetesContainerManager(cluster_name=name) cl = ClipperConnection(cm) From dad441b808f0adb1a94687393d417e63c7d8050c Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 29 May 2018 17:35:33 -0700 Subject: [PATCH 12/63] Don't connect after cleanup --- integration-tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 644ccc351..69d7c721a 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -97,7 +97,7 @@ def create_docker_connection(cleanup=True, clipper_rpc_port=find_unbound_port(), redis_port=find_unbound_port()) cl = ClipperConnection(cm) - else: + elif not cleanup: # Don't connect after cleanup cl.connect() return cl From 8bd5b3bfcf96308de306a040bb2e2c380809d3ee Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 29 May 2018 17:48:11 -0700 Subject: [PATCH 13/63] Format code --- integration-tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 69d7c721a..075c96de3 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -97,7 +97,7 @@ def create_docker_connection(cleanup=True, clipper_rpc_port=find_unbound_port(), redis_port=find_unbound_port()) cl = ClipperConnection(cm) - elif not cleanup: # Don't connect after cleanup + elif not cleanup: # Don't connect after cleanup cl.connect() return cl From 7ecc8f24fdaebff65bf5c2dafcb7d17b43174cdf Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 30 May 2018 12:33:57 -0700 Subject: [PATCH 14/63] Add debug line; tests pass locally; suspecting dangling containers --- bin/run_unittests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index b4be710f6..d9acb3b70 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -129,6 +129,9 @@ function run_frontend_tests { function run_integration_tests { echo -e "\nRunning integration tests\n\n" cd $DIR + + docker ps + python ../integration-tests/clipper_admin_tests.py python ../integration-tests/many_apps_many_models.py 2 3 python ../integration-tests/deploy_pyspark_models.py From f62922b8edf2b1a2880c9f6bef7b38bd6ca3c126 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 14:40:35 -0700 Subject: [PATCH 15/63] Checkout parallel build docker --- bin/build_docker_images.sh | 75 +++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index bd3c7884c..d0fba4c88 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -202,9 +202,16 @@ create_image () { local public=$3 # Push the built images to Docker Hub under # the clipper namespace. Must have credentials. + + if [ "$#" -eq 4 ]; then + local rpc_version="--build-arg RPC_VERSION=$4" + else + local rpc_version="" + fi + echo "Building $namespace/$image:$sha_tag from file $dockerfile" - time docker build --build-arg CODE_VERSION=$sha_tag -t $namespace/$image:$sha_tag \ + time docker build --build-arg CODE_VERSION=$sha_tag $rpc_version -t $namespace/$image:$sha_tag \ -f dockerfiles/$dockerfile $CLIPPER_ROOT docker tag $namespace/$image:$sha_tag $namespace/$image:$version_tag @@ -242,33 +249,57 @@ build_images () { # Build Clipper core images create_image lib_base ClipperLibBaseDockerfile $private - create_image query_frontend QueryFrontendDockerfile $public - create_image management_frontend ManagementFrontendDockerfile $public - create_image dev ClipperDevDockerfile $public - create_image py35-dev ClipperPy35DevDockerfile $public - create_image unittests ClipperTestsDockerfile $private - create_image py35tests ClipperPy35TestsDockerfile $private + + # Build the rest in parallel + create_image query_frontend QueryFrontendDockerfile $public & + create_image management_frontend ManagementFrontendDockerfile $public & + create_image dev ClipperDevDockerfile $public & + create_image py35-dev ClipperPy35DevDockerfile $public & + create_image unittests ClipperTestsDockerfile $private & + create_image py35tests ClipperPy35TestsDockerfile $private & + wait # Build containers for other languages - create_image spark-scala-container SparkScalaContainerDockerfile $public - create_image r-container-base RContainerDockerfile $public + create_image spark-scala-container SparkScalaContainerDockerfile $public & + create_image r-container-base RContainerDockerfile $public & # First build Python base image - create_image py-rpc Py2RPCDockerfile $public - create_image py35-rpc Py35RPCDockerfile $public - create_image py36-rpc Py36RPCDockerfile $public - create_image sum-container SumDockerfile $private - create_image noop-container NoopDockerfile $public - create_image python-closure-container PyClosureContainerDockerfile $public - create_image python35-closure-container Py35ClosureContainerDockerfile $public - create_image python36-closure-container Py36ClosureContainerDockerfile $public - create_image pyspark-container PySparkContainerDockerfile $public - create_image tf-container TensorFlowDockerfile $public - create_image pytorch-container PyTorchContainerDockerfile $public + create_image py-rpc Py2RPCDockerfile $public & + create_image py35-rpc Py35RPCDockerfile $public & + create_image py36-rpc Py36RPCDockerfile $public & + wait + + create_image sum-container SumDockerfile $private & + create_image noop-container NoopDockerfile $public & + + create_image python-closure-container PyClosureContainerDockerfile $public py & + create_image python35-closure-container PyClosureContainerDockerfile $public py35 & + create_image python36-closure-container PyClosureContainerDockerfile $public py36 & + wait + + create_image pyspark-container PySparkContainerDockerfile $public py & + create_image pyspark35-container PySparkContainerDockerfile $public py35 & + create_image pyspark36-container PySparkContainerDockerfile $public py36 & + + create_image tf-container TensorFlowDockerfile $public py & + create_image tf35-container TensorFlowDockerfile $public py35 & + create_image tf36-container TensorFlowDockerfile $public py36 & + wait + + create_image pytorch-container PyTorchContainerDockerfile $public py & + create_image pytorch35-container PyTorchContainerDockerfile $public py35 & + create_image pytorch36-container PyTorchContainerDockerfile $public py36 & + # See issue #475 - # create_image caffe2-onnx-container Caffe2OnnxDockerfile $public - create_image mxnet-container MXNetContainerDockerfile $public + # create_image caffe2-onnx-container Caffe2OnnxDockerfile $public py + # create_image caffe235-onnx-container Caffe2OnnxDockerfile $public py35 + # create_image caffe236-onnx-container Caffe2OnnxDockerfile $public py36 + + create_image mxnet-container MXNetContainerDockerfile $public py & + create_image mxnet35-container MXNetContainerDockerfile $public py35 & + create_image mxnet36-container MXNetContainerDockerfile $public py36 & + wait # Build Metric Monitor image - no dependency create_image frontend-exporter FrontendExporterDockerfile $public From bcbda0b516d4a4e81080f404ac93579a0f20eba0 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 14:44:54 -0700 Subject: [PATCH 16/63] Make persistent test less verbose --- src/libclipper/test/persistent_state_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libclipper/test/persistent_state_test.cpp b/src/libclipper/test/persistent_state_test.cpp index 75237593b..2555250d4 100644 --- a/src/libclipper/test/persistent_state_test.cpp +++ b/src/libclipper/test/persistent_state_test.cpp @@ -39,12 +39,12 @@ TEST_F(StateDBTest, TestSinglePutGet) { TEST_F(StateDBTest, TestManyPutGet) { ASSERT_EQ(db_.num_entries(), 0); - for (int i = 0; i < 5000; ++i) { + for (int i = 0; i < 10; ++i) { StateKey key = std::make_tuple("Dan", i, 1234); ASSERT_TRUE(db_.put(key, "valuestring")); } - ASSERT_EQ(db_.num_entries(), 5000); - for (int i = 0; i < 5000; ++i) { + ASSERT_EQ(db_.num_entries(), 10); + for (int i = 0; i < 10; ++i) { StateKey key = std::make_tuple("Dan", i, 1234); ASSERT_TRUE(db_.remove(key)); } From b963d287773802d495cb0c015859a25fb8dc6f49 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 14:50:33 -0700 Subject: [PATCH 17/63] Make Unittest all parallel --- bin/run_unittests.sh | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index d9acb3b70..8b697911b 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -130,24 +130,30 @@ function run_integration_tests { echo -e "\nRunning integration tests\n\n" cd $DIR + echo "GREPTHIS Docker State before:" docker ps - python ../integration-tests/clipper_admin_tests.py - python ../integration-tests/many_apps_many_models.py 2 3 - python ../integration-tests/deploy_pyspark_models.py - python ../integration-tests/deploy_pyspark_pipeline_models.py - python ../integration-tests/deploy_pyspark_sparkml_models.py - python ../integration-tests/kubernetes_integration_test.py - python ../integration-tests/kubernetes_multi_frontend.py - python ../integration-tests/deploy_tensorflow_models.py - python ../integration-tests/deploy_mxnet_models.py - python ../integration-tests/deploy_pytorch_models.py + python ../integration-tests/clipper_admin_tests.py & + python ../integration-tests/many_apps_many_models.py 2 3 & + python ../integration-tests/deploy_pyspark_models.py & + python ../integration-tests/deploy_pyspark_pipeline_models.py & + python ../integration-tests/deploy_pyspark_sparkml_models.py & + python ../integration-tests/kubernetes_integration_test.py & + python ../integration-tests/kubernetes_multi_frontend.py & + python ../integration-tests/deploy_tensorflow_models.py & + python ../integration-tests/deploy_mxnet_models.py & + python ../integration-tests/deploy_pytorch_models.py & # See issue #475 # python ../integration-tests/deploy_pytorch_to_caffe2_with_onnx.py - ../integration-tests/r_integration_test/rclipper_test.sh - python ../integration-tests/clipper_metric_docker.py - python ../integration-tests/clipper_metric_kube.py - python ../integration-tests/multi_tenency_test.py + ../integration-tests/r_integration_test/rclipper_test.sh & + python ../integration-tests/clipper_metric_docker.py & + python ../integration-tests/clipper_metric_kube.py & + python ../integration-tests/multi_tenency_test.py & + wait + + echo "GREPTHIS Docker State After" + docker ps + echo "Exit code: $?" echo "GREPTHIS Done running unit tests" } From 044d7243f96fd5d9189350eea329d1addc023fb4 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 14:53:17 -0700 Subject: [PATCH 18/63] Make maven less verbose --- bin/run_unittests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index 8b697911b..08bf15822 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -91,7 +91,7 @@ function run_jvm_container_tests { echo "Running JVM container tests..." cd $DIR cd ../containers/jvm - mvn test + mvn test -q } function run_r_container_tests { From 1c9c644823ee3fa1a6a454bcd4ac5ffecaf5b1fb Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 15:20:21 -0700 Subject: [PATCH 19/63] Change persistent_state_test 10->100 --- src/libclipper/test/persistent_state_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libclipper/test/persistent_state_test.cpp b/src/libclipper/test/persistent_state_test.cpp index 2555250d4..47d61f48f 100644 --- a/src/libclipper/test/persistent_state_test.cpp +++ b/src/libclipper/test/persistent_state_test.cpp @@ -39,12 +39,12 @@ TEST_F(StateDBTest, TestSinglePutGet) { TEST_F(StateDBTest, TestManyPutGet) { ASSERT_EQ(db_.num_entries(), 0); - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < 100; ++i) { StateKey key = std::make_tuple("Dan", i, 1234); ASSERT_TRUE(db_.put(key, "valuestring")); } - ASSERT_EQ(db_.num_entries(), 10); - for (int i = 0; i < 10; ++i) { + ASSERT_EQ(db_.num_entries(), 100); + for (int i = 0; i < 100; ++i) { StateKey key = std::make_tuple("Dan", i, 1234); ASSERT_TRUE(db_.remove(key)); } From 3af90eb284c88653d2dfa5fe77690f689cd21ec6 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 19:11:51 -0700 Subject: [PATCH 20/63] Refactor tests to include cluster name --- bin/run_unittests.sh | 1 + clipper_admin/clipper_admin/clipper_admin.py | 7 +- .../clipper_admin/container_manager.py | 11 ++- .../docker/docker_container_manager.py | 7 +- .../kubernetes_container_manager.py | 2 +- integration-tests/clipper_admin_tests.py | 15 ++-- integration-tests/clipper_metric_docker.py | 3 +- integration-tests/clipper_metric_kube.py | 12 +-- integration-tests/deploy_mxnet_models.py | 14 ++-- integration-tests/deploy_pyspark_models.py | 12 ++- .../deploy_pyspark_pipeline_models.py | 11 ++- .../deploy_pyspark_sparkml_models.py | 11 ++- integration-tests/deploy_pytorch_models.py | 11 ++- .../deploy_pytorch_to_caffe2_with_onnx.py | 11 ++- integration-tests/deploy_tensorflow_models.py | 11 ++- integration-tests/deploy_xgboost_models.py | 11 ++- .../kubernetes_integration_test.py | 9 ++- .../kubernetes_multi_frontend.py | 9 ++- integration-tests/many_apps_many_models.py | 12 ++- integration-tests/multi_tenency_test.py | 12 +-- integration-tests/test_utils.py | 77 +++++++++++-------- 21 files changed, 172 insertions(+), 97 deletions(-) diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index 08bf15822..001b9b0fb 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -149,6 +149,7 @@ function run_integration_tests { python ../integration-tests/clipper_metric_docker.py & python ../integration-tests/clipper_metric_kube.py & python ../integration-tests/multi_tenency_test.py & + python ../integration-tests/multi_tenency_test.py --kubernetes & wait echo "GREPTHIS Docker State After" diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index d6872b380..5435c7c40 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -1247,14 +1247,17 @@ def stop_all_model_containers(self): self.cm.stop_all_model_containers() logger.info("Stopped all Clipper model containers") - def stop_all(self): + def stop_all(self, graceful=True): """Stops all processes that were started via Clipper admin commands. This includes the query and management frontend Docker containers and all model containers. If you started Redis independently, this will not affect Redis. It can also be called without calling ``connect`` first. + + If graceful=False, Clipper will issue Docker Kill if it's in the Docker Mode. This parameter + will take not effect in Kubernetes. """ - self.cm.stop_all() + self.cm.stop_all(graceful=graceful) logger.info("Stopped all Clipper cluster and all model containers") def test_predict_function(self, query, func, input_type): diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 52e1e8735..b745d92cc 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -160,8 +160,15 @@ def stop_all_model_containers(self): return @abc.abstractmethod - def stop_all(self): - """Stop all resources associated with Clipper.""" + def stop_all(self, graceful=True): + """Stop all resources associated with Clipper. + + Parameters + ---------- + graceful : bool + If set to True, Clipper will try to shutdown all containers gracefully. + This option will only work in Docker (Using Docker stop instead of kill). + """ pass @abc.abstractmethod diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index fdaf387dc..ae72bbfdc 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -415,7 +415,7 @@ def stop_all_model_containers(self): for c in containers: c.stop() - def stop_all(self): + def stop_all(self, graceful=True): containers = self.docker_client.containers.list( filters={ "label": @@ -423,7 +423,10 @@ def stop_all(self): key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) }) for c in containers: - c.stop() + if graceful: + c.stop() + else: + c.kill() def get_admin_addr(self): return "{host}:{port}".format( diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 82cc7dbe5..648670b28 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -436,7 +436,7 @@ def stop_all_model_containers(self): "Exception deleting kubernetes deployments: {}".format(e)) raise e - def stop_all(self): + def stop_all(self, graceful=True): logger.info("Stopping all running Clipper resources") cluster_selecter = "{cluster_label}={cluster_name}".format( diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index e97e48be1..07847b1f1 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -14,8 +14,10 @@ import requests import tempfile import shutil +import random from argparse import ArgumentParser import logging + from test_utils import get_docker_client, create_docker_connection, fake_model_data cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -35,14 +37,16 @@ class ClipperManagerTestCaseShort(unittest.TestCase): - @classmethod - def tearDownClass(self): - self.clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) def setUp(self): + new_name = "cluster-{}".format(random.randint(0,5000)) self.clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=new_name) + self.name = new_name + + def tearDown(self): + self.clipper_conn = create_docker_connection( + cleanup=True, start_clipper=False, cleanup_name=self.name) def test_register_model_correct(self): input_type = "doubles" @@ -772,7 +776,6 @@ def test_remove_inactive_container(self): data=req_json) result = response.json() self.assertEqual(response.status_code, requests.codes.ok) - #print(result["default_explanation"]) self.assertEqual(result["default"], False) # one of the containers should go inactive diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index 80e9086a6..d460f8c1a 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -6,6 +6,7 @@ import subprocess import sys import time +import random import numpy as np import requests @@ -85,7 +86,7 @@ def log_docker_ps(clipper_conn): logger = logging.getLogger(__name__) logger.info("Start Metric Test (0/1): Running 2 Replicas") - clipper_conn = ClipperConnection(DockerContainerManager(redis_port=6380)) + clipper_conn = ClipperConnection(DockerContainerManager(cluster_name='cluster-'.format(random.randint(0,50000)), redis_port=6380)) clipper_conn.start_clipper() python_deployer.create_endpoint( clipper_conn, "simple-example", "doubles", feature_sum, num_replicas=2) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index dd7bc0f74..0b6ec4181 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -137,9 +137,11 @@ def check_target_health(metric_addr): if __name__ == "__main__": + import random + cluster_name = 'cluster-{}'.format(random.randint(0,5000)) try: - clipper_conn = create_kubernetes_connection( - cleanup=True, start_clipper=True) + clipper_conn = create_kubernetes_connection(new_name=cluster_name, + cleanup=False, start_clipper=True) time.sleep(60) logger.info(clipper_conn.cm.get_query_addr()) try: @@ -196,20 +198,20 @@ def check_target_health(metric_addr): log_clipper_state(clipper_conn) logger.info("SUCCESS") create_kubernetes_connection( - cleanup=True, start_clipper=False, connect=False) + cleanup=True, start_clipper=False, connect=False, cleanup_name=cluster_name) logger.info("EXITING") os._exit(0) except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") create_kubernetes_connection( - cleanup=True, start_clipper=False, connect=False) + cleanup=True, start_clipper=False, connect=False, cleanup_name=cluster_name) sys.exit(1) except ClipperException as e: log_clipper_state(clipper_conn) logger.exception("ClipperException") create_kubernetes_connection( - cleanup=True, start_clipper=False, connect=False) + cleanup=True, start_clipper=False, connect=False, cleanup_name=cluster_name) sys.exit(1) except Exception as e: logger.exception("Exception: {}".format(e)) diff --git a/integration-tests/deploy_mxnet_models.py b/integration-tests/deploy_mxnet_models.py index b0c12e75f..656839be3 100644 --- a/integration-tests/deploy_mxnet_models.py +++ b/integration-tests/deploy_mxnet_models.py @@ -8,6 +8,7 @@ import numpy as np import time import logging +import random cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -89,9 +90,12 @@ def get_test_point(): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: - clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + clipper_conn = create_docker_connection(new_name=cluster_name, + cleanup=False, start_clipper=True) train_path = os.path.join(cur_dir, "data/train.data") data_iter = mx.io.CSVIter( @@ -144,14 +148,14 @@ def get_test_point(): except BenchmarkException: logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_pyspark_models.py b/integration-tests/deploy_pyspark_models.py index ab6139d69..b11aee4a8 100644 --- a/integration-tests/deploy_pyspark_models.py +++ b/integration-tests/deploy_pyspark_models.py @@ -120,6 +120,10 @@ def get_test_point(): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) + try: spark = SparkSession\ .builder\ @@ -127,7 +131,7 @@ def get_test_point(): .getOrCreate() sc = spark.sparkContext clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) train_path = os.path.join(cur_dir, "data/train.data") trainRDD = sc.textFile(train_path).map( @@ -180,14 +184,14 @@ def get_test_point(): log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: spark.stop() clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_pyspark_pipeline_models.py b/integration-tests/deploy_pyspark_pipeline_models.py index 905fa6df5..35c6705a4 100644 --- a/integration-tests/deploy_pyspark_pipeline_models.py +++ b/integration-tests/deploy_pyspark_pipeline_models.py @@ -55,6 +55,9 @@ def predict(spark, pipeline, xs): def run_test(): + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) + spark = SparkSession\ .builder\ .appName("clipper-pyspark")\ @@ -93,7 +96,7 @@ def run_test(): try: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) try: clipper_conn.register_application(app_name, "strings", @@ -167,17 +170,17 @@ def run_test(): log_clipper_state() logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: spark.stop() clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) logger.info("ALL TESTS PASSED") except Exception as e: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_pyspark_sparkml_models.py b/integration-tests/deploy_pyspark_sparkml_models.py index 97bce65af..d54371184 100644 --- a/integration-tests/deploy_pyspark_sparkml_models.py +++ b/integration-tests/deploy_pyspark_sparkml_models.py @@ -115,6 +115,9 @@ def get_test_point(): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: spark = SparkSession\ .builder\ @@ -122,7 +125,7 @@ def get_test_point(): .getOrCreate() sc = spark.sparkContext clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) train_path = os.path.join(cur_dir, "data/train.data") trainRDD = spark.sparkContext.textFile(train_path).map( @@ -154,14 +157,14 @@ def get_test_point(): log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: spark.stop() clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_pytorch_models.py b/integration-tests/deploy_pytorch_models.py index d8413cd81..08a8be577 100644 --- a/integration-tests/deploy_pytorch_models.py +++ b/integration-tests/deploy_pytorch_models.py @@ -159,9 +159,12 @@ def __getitem__(self, index): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) train_path = os.path.join(cur_dir, "data/train.data") train_x, train_y = parsedata(train_path, pos_label) @@ -202,13 +205,13 @@ def __getitem__(self, index): log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py index 8fd9f0aeb..ed5445169 100644 --- a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py +++ b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py @@ -167,9 +167,12 @@ def __getitem__(self, index): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) train_path = os.path.join(cur_dir, "data/train.data") train_x, train_y = parsedata(train_path, pos_label) @@ -218,13 +221,13 @@ def __getitem__(self, index): log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_tensorflow_models.py b/integration-tests/deploy_tensorflow_models.py index e9ca32cc9..8cd35a27e 100644 --- a/integration-tests/deploy_tensorflow_models.py +++ b/integration-tests/deploy_tensorflow_models.py @@ -150,10 +150,13 @@ def get_test_point(): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: sess = None clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) train_path = os.path.join(cur_dir, "data/train.data") (X_train, y_train) = parseData(train_path, pos_label) @@ -218,13 +221,13 @@ def get_test_point(): log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/deploy_xgboost_models.py b/integration-tests/deploy_xgboost_models.py index 21a2f9c84..ac1cc3da4 100644 --- a/integration-tests/deploy_xgboost_models.py +++ b/integration-tests/deploy_xgboost_models.py @@ -82,9 +82,12 @@ def get_test_point(): if __name__ == "__main__": pos_label = 3 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) try: clipper_conn.register_application(app_name, "integers", @@ -124,13 +127,13 @@ def predict(xs): log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception as e: logger.exception("Exception") clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index 81b2e9fae..b5772c468 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -147,14 +147,19 @@ def test_kubernetes(clipper_conn, num_apps, num_models): pass try: # Test without proxy first + import random + + cluster_name = "cluster-{}".format(random.randint(0, 5000)) + clipper_conn = create_kubernetes_connection( - cleanup=True, start_clipper=True, with_proxy=False) + cleanup=False, start_clipper=True, with_proxy=False, new_name=cluster_name) test_kubernetes(clipper_conn, num_apps, num_models) clipper_conn.stop_all() # Test with proxy. Assumes proxy is running at 127.0.0.1:8080 + proxy_name = "cluster-{}".format(random.randint(0,5000)) clipper_conn = create_kubernetes_connection( - cleanup=True, start_clipper=True, with_proxy=True) + cleanup=True, start_clipper=True, with_proxy=True, cleanup_name=cluster_name, new_name=proxy_name) test_kubernetes(clipper_conn, 1, 1) clipper_conn.stop_all() diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index 81d108cfe..e832ffacb 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -100,9 +100,12 @@ def create_and_test_app(clipper_conn, name): if __name__ == "__main__": + import random + + cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( - cleanup=True, start_clipper=True, num_frontend_replicas=2) + cleanup=False, start_clipper=True, num_frontend_replicas=2, new_name=cluster_name) time.sleep(10) print(clipper_conn.cm.get_query_addr()) try: @@ -156,12 +159,12 @@ def create_and_test_app(clipper_conn, name): except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") - create_kubernetes_connection(cleanup=True, start_clipper=False) + create_kubernetes_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) except ClipperException as e: log_clipper_state(clipper_conn) logger.exception("ClipperException") - create_kubernetes_connection(cleanup=True, start_clipper=False) + create_kubernetes_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) except Exception as e: logger.exception("Exception: {}".format(e)) diff --git a/integration-tests/many_apps_many_models.py b/integration-tests/many_apps_many_models.py index 4b6244e71..ab7b8d331 100644 --- a/integration-tests/many_apps_many_models.py +++ b/integration-tests/many_apps_many_models.py @@ -96,6 +96,10 @@ def create_and_test_app(clipper_conn, name, num_models): if __name__ == "__main__": num_apps = 6 num_models = 8 + + import random + cluster_name = "cluster-{}".format(random.randint(0, 5000)) + try: if len(sys.argv) > 1: num_apps = int(sys.argv[1]) @@ -107,7 +111,7 @@ def create_and_test_app(clipper_conn, name, num_models): pass try: clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=cluster_name) time.sleep(10) try: logger.info("Running integration test with %d apps and %d models" % @@ -126,11 +130,11 @@ def create_and_test_app(clipper_conn, name, num_models): except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") - create_docker_connection(cleanup=True, start_clipper=False) + create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: - create_docker_connection(cleanup=True, start_clipper=False) + create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception as e: logger.exception("Exception") - create_docker_connection(cleanup=True, start_clipper=False) + create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenency_test.py index f7142b9ab..384a0364d 100644 --- a/integration-tests/multi_tenency_test.py +++ b/integration-tests/multi_tenency_test.py @@ -9,11 +9,8 @@ import os import time from test_utils import create_kubernetes_connection, create_docker_connection -import click -@click.command() -@click.option('--kubernetes', is_flag=True) def test(kubernetes): conn_1 = create('cluster-1', use_kubernetes=kubernetes) conn_2 = create('cluster-2', use_kubernetes=kubernetes) @@ -33,10 +30,10 @@ def test(kubernetes): def create(name, use_kubernetes=False): if use_kubernetes: conn = create_kubernetes_connection( - cleanup=True, start_clipper=True, name=name) + cleanup=False, start_clipper=True, new_name=name) else: conn = create_docker_connection( - cleanup=True, start_clipper=True, name=name) + cleanup=False, start_clipper=True, new_name=name) return conn @@ -75,4 +72,7 @@ def predict_(addr, x, batch=False): if __name__ == '__main__': - test() + if sys.argv[1] == '--kubernetes': + test(kubernetes=True) + else: + test(kubernetes=False) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 075c96de3..32f2a31cb 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -64,23 +64,40 @@ def find_unbound_port(): def create_docker_connection(cleanup=True, start_clipper=True, - name='default-cluster'): + cleanup_name='default-cluster', + new_name='default-cluster' + ): logger.info("Creating DockerContainerManager") - cm = DockerContainerManager( - cluster_name=name, - clipper_query_port=find_unbound_port(), - clipper_management_port=find_unbound_port(), - clipper_rpc_port=find_unbound_port(), - redis_port=find_unbound_port()) - cl = ClipperConnection(cm) + cl = None + assert cleanup or start_clipper, "You must set at least one of {cleanup, start_clipper} to be true." + if cleanup: - cl.stop_all() + logger.info("Cleaning up Docker cluster {}".format(cleanup_name)) + cm = DockerContainerManager( + cluster_name=cleanup_name, + clipper_query_port=find_unbound_port(), + clipper_management_port=find_unbound_port(), + clipper_rpc_port=find_unbound_port(), + redis_port=find_unbound_port(), + ) + cl = ClipperConnection(cm) + cl.stop_all(graceful=False) docker_client = get_docker_client() docker_client.containers.prune(filters={"label": CLIPPER_DOCKER_LABEL}) + if start_clipper: # Try to start Clipper in a retry loop here to address flaky tests # as described in https://github.com/ucbrise/clipper/issues/352 + logger.info("Starting up Docker cluster {}".format(new_name)) + while True: + cm = DockerContainerManager( + cluster_name=new_name, + clipper_query_port=find_unbound_port(), + clipper_management_port=find_unbound_port(), + clipper_rpc_port=find_unbound_port(), + redis_port=find_unbound_port()) + cl = ClipperConnection(cm) try: logger.info("Starting Clipper") cl.start_clipper() @@ -90,15 +107,6 @@ def create_docker_connection(cleanup=True, logger.info( "Problem starting Clipper: {}\nTrying again.".format(e)) cl.stop_all() - cm = DockerContainerManager( - cluster_name=name, - clipper_query_port=find_unbound_port(), - clipper_management_port=find_unbound_port(), - clipper_rpc_port=find_unbound_port(), - redis_port=find_unbound_port()) - cl = ClipperConnection(cm) - elif not cleanup: # Don't connect after cleanup - cl.connect() return cl @@ -107,21 +115,29 @@ def create_kubernetes_connection(cleanup=True, connect=True, with_proxy=False, num_frontend_replicas=1, - name='default-cluster'): + cleanup_name='default-cluster', + new_name='default-cluster', + connect_name='default-cluster'): logger.info("Creating KubernetesContainerManager") + cl = None + assert cleanup or start_clipper, "You must set at least one of {cleanup, start_clipper} to be true." + if with_proxy: - cm = KubernetesContainerManager( - cluster_name=name, kubernetes_proxy_addr="127.0.0.1:8080") + kubernetes_proxy_addr = "127.0.0.1:8080" else: - cm = KubernetesContainerManager(cluster_name=name) - cl = ClipperConnection(cm) + kubernetes_proxy_addr = None + if cleanup: + logger.info("Cleaning up Kubernetes Cluster {}".format(cleanup_name)) + cm = KubernetesContainerManager(cluster_name=cleanup_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cl = ClipperConnection(cm) cl.stop_all() - # Give kubernetes some time to clean up - time.sleep(20) logger.info("Done cleaning up clipper") + if start_clipper: - logger.info("Starting Clipper") + logger.info("Starting up Kubernetes Cluster {}".format(new_name)) + cm = KubernetesContainerManager(cluster_name=cleanup_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cl = ClipperConnection(cm) cl.start_clipper( query_frontend_image= "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper/query_frontend:{}". @@ -130,15 +146,16 @@ def create_kubernetes_connection(cleanup=True, "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper/management_frontend:{}". format(clipper_version), num_frontend_replicas=num_frontend_replicas) - time.sleep(1) + if connect: try: + cm = KubernetesContainerManager(cluster_name=connect_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cl = ClipperConnection(cm) cl.connect() except Exception: pass - except ClipperException: - pass - return cl + + return cl def log_clipper_state(cl): From ee71d06a55fc59d8ed4140efdc1ecea308672a88 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 31 May 2018 19:12:51 -0700 Subject: [PATCH 21/63] Format code --- integration-tests/clipper_admin_tests.py | 3 +-- integration-tests/clipper_metric_docker.py | 5 ++++- integration-tests/clipper_metric_kube.py | 21 +++++++++++++------ integration-tests/deploy_mxnet_models.py | 4 ++-- .../kubernetes_integration_test.py | 13 +++++++++--- .../kubernetes_multi_frontend.py | 11 +++++++--- integration-tests/many_apps_many_models.py | 9 +++++--- integration-tests/test_utils.py | 15 ++++++++----- 8 files changed, 56 insertions(+), 25 deletions(-) diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index 07847b1f1..2a725baeb 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -37,9 +37,8 @@ class ClipperManagerTestCaseShort(unittest.TestCase): - def setUp(self): - new_name = "cluster-{}".format(random.randint(0,5000)) + new_name = "cluster-{}".format(random.randint(0, 5000)) self.clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=new_name) self.name = new_name diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index d460f8c1a..9a2009f45 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -86,7 +86,10 @@ def log_docker_ps(clipper_conn): logger = logging.getLogger(__name__) logger.info("Start Metric Test (0/1): Running 2 Replicas") - clipper_conn = ClipperConnection(DockerContainerManager(cluster_name='cluster-'.format(random.randint(0,50000)), redis_port=6380)) + clipper_conn = ClipperConnection( + DockerContainerManager( + cluster_name='cluster-'.format(random.randint(0, 50000)), + redis_port=6380)) clipper_conn.start_clipper() python_deployer.create_endpoint( clipper_conn, "simple-example", "doubles", feature_sum, num_replicas=2) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index 0b6ec4181..dc587d685 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -138,10 +138,10 @@ def check_target_health(metric_addr): if __name__ == "__main__": import random - cluster_name = 'cluster-{}'.format(random.randint(0,5000)) + cluster_name = 'cluster-{}'.format(random.randint(0, 5000)) try: - clipper_conn = create_kubernetes_connection(new_name=cluster_name, - cleanup=False, start_clipper=True) + clipper_conn = create_kubernetes_connection( + new_name=cluster_name, cleanup=False, start_clipper=True) time.sleep(60) logger.info(clipper_conn.cm.get_query_addr()) try: @@ -198,20 +198,29 @@ def check_target_health(metric_addr): log_clipper_state(clipper_conn) logger.info("SUCCESS") create_kubernetes_connection( - cleanup=True, start_clipper=False, connect=False, cleanup_name=cluster_name) + cleanup=True, + start_clipper=False, + connect=False, + cleanup_name=cluster_name) logger.info("EXITING") os._exit(0) except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") create_kubernetes_connection( - cleanup=True, start_clipper=False, connect=False, cleanup_name=cluster_name) + cleanup=True, + start_clipper=False, + connect=False, + cleanup_name=cluster_name) sys.exit(1) except ClipperException as e: log_clipper_state(clipper_conn) logger.exception("ClipperException") create_kubernetes_connection( - cleanup=True, start_clipper=False, connect=False, cleanup_name=cluster_name) + cleanup=True, + start_clipper=False, + connect=False, + cleanup_name=cluster_name) sys.exit(1) except Exception as e: logger.exception("Exception: {}".format(e)) diff --git a/integration-tests/deploy_mxnet_models.py b/integration-tests/deploy_mxnet_models.py index 656839be3..bf1a8d993 100644 --- a/integration-tests/deploy_mxnet_models.py +++ b/integration-tests/deploy_mxnet_models.py @@ -94,8 +94,8 @@ def get_test_point(): import random cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: - clipper_conn = create_docker_connection(new_name=cluster_name, - cleanup=False, start_clipper=True) + clipper_conn = create_docker_connection( + new_name=cluster_name, cleanup=False, start_clipper=True) train_path = os.path.join(cur_dir, "data/train.data") data_iter = mx.io.CSVIter( diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index b5772c468..744d002d9 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -152,14 +152,21 @@ def test_kubernetes(clipper_conn, num_apps, num_models): cluster_name = "cluster-{}".format(random.randint(0, 5000)) clipper_conn = create_kubernetes_connection( - cleanup=False, start_clipper=True, with_proxy=False, new_name=cluster_name) + cleanup=False, + start_clipper=True, + with_proxy=False, + new_name=cluster_name) test_kubernetes(clipper_conn, num_apps, num_models) clipper_conn.stop_all() # Test with proxy. Assumes proxy is running at 127.0.0.1:8080 - proxy_name = "cluster-{}".format(random.randint(0,5000)) + proxy_name = "cluster-{}".format(random.randint(0, 5000)) clipper_conn = create_kubernetes_connection( - cleanup=True, start_clipper=True, with_proxy=True, cleanup_name=cluster_name, new_name=proxy_name) + cleanup=True, + start_clipper=True, + with_proxy=True, + cleanup_name=cluster_name, + new_name=proxy_name) test_kubernetes(clipper_conn, 1, 1) clipper_conn.stop_all() diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index e832ffacb..caac694a1 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -105,7 +105,10 @@ def create_and_test_app(clipper_conn, name): cluster_name = "cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( - cleanup=False, start_clipper=True, num_frontend_replicas=2, new_name=cluster_name) + cleanup=False, + start_clipper=True, + num_frontend_replicas=2, + new_name=cluster_name) time.sleep(10) print(clipper_conn.cm.get_query_addr()) try: @@ -159,12 +162,14 @@ def create_and_test_app(clipper_conn, name): except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") - create_kubernetes_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) + create_kubernetes_connection( + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) except ClipperException as e: log_clipper_state(clipper_conn) logger.exception("ClipperException") - create_kubernetes_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) + create_kubernetes_connection( + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) except Exception as e: logger.exception("Exception: {}".format(e)) diff --git a/integration-tests/many_apps_many_models.py b/integration-tests/many_apps_many_models.py index ab7b8d331..332806f58 100644 --- a/integration-tests/many_apps_many_models.py +++ b/integration-tests/many_apps_many_models.py @@ -130,11 +130,14 @@ def create_and_test_app(clipper_conn, name, num_models): except BenchmarkException as e: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") - create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) + create_docker_connection( + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) else: - create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) + create_docker_connection( + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception as e: logger.exception("Exception") - create_docker_connection(cleanup=True, start_clipper=False, cleanup_name=cluster_name) + create_docker_connection( + cleanup=True, start_clipper=False, cleanup_name=cluster_name) sys.exit(1) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 32f2a31cb..2e659ddc8 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -65,8 +65,7 @@ def find_unbound_port(): def create_docker_connection(cleanup=True, start_clipper=True, cleanup_name='default-cluster', - new_name='default-cluster' - ): + new_name='default-cluster'): logger.info("Creating DockerContainerManager") cl = None assert cleanup or start_clipper, "You must set at least one of {cleanup, start_clipper} to be true." @@ -129,14 +128,18 @@ def create_kubernetes_connection(cleanup=True, if cleanup: logger.info("Cleaning up Kubernetes Cluster {}".format(cleanup_name)) - cm = KubernetesContainerManager(cluster_name=cleanup_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cm = KubernetesContainerManager( + cluster_name=cleanup_name, + kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.stop_all() logger.info("Done cleaning up clipper") if start_clipper: logger.info("Starting up Kubernetes Cluster {}".format(new_name)) - cm = KubernetesContainerManager(cluster_name=cleanup_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cm = KubernetesContainerManager( + cluster_name=cleanup_name, + kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.start_clipper( query_frontend_image= @@ -149,7 +152,9 @@ def create_kubernetes_connection(cleanup=True, if connect: try: - cm = KubernetesContainerManager(cluster_name=connect_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cm = KubernetesContainerManager( + cluster_name=connect_name, + kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.connect() except Exception: From e10ff2ef95aaa4043df84c67cccc8ff0d664393e Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 1 Jun 2018 13:45:15 -0700 Subject: [PATCH 22/63] Add logging support; push to test on ubuntu --- bin/run_ci.sh | 3 + clipper_admin/clipper_admin/clipper_admin.py | 106 +++++++++--------- .../clipper_admin/container_manager.py | 15 +++ .../docker/docker_container_manager.py | 18 +-- .../kubernetes_container_manager.py | 29 ++--- integration-tests/clipper_metric_docker.py | 10 +- integration-tests/test_utils.py | 2 +- 7 files changed, 103 insertions(+), 80 deletions(-) diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 6af56f434..2980d2b77 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -7,6 +7,9 @@ set -o pipefail # Printout for timeout debug date +echo "Simon: Failure Intended. Jenkins don't test this" +exit 1 + unset CDPATH # one-liner from http://stackoverflow.com/a/246128 # Determines absolute path of the directory containing diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 5435c7c40..567b89670 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -24,7 +24,7 @@ from io import BytesIO as StringIO PY3 = True -from .container_manager import CONTAINERLESS_MODEL_IMAGE +from .container_manager import CONTAINERLESS_MODEL_IMAGE, ClusterAdapter from .exceptions import ClipperException, UnconnectedException from .version import __version__ @@ -79,6 +79,8 @@ def __init__(self, container_manager): self.connected = False self.cm = container_manager + self.logger = ClusterAdapter(logger, {'cluster_name': self.cm.cluster_name}) + def start_clipper( self, query_frontend_image='clipper/query_frontend:{}'.format( @@ -121,12 +123,12 @@ def start_clipper( raise RequestException break except RequestException: - logger.info("Clipper still initializing.") + self.logger.info("Clipper still initializing.") time.sleep(1) - logger.info("Clipper is running") + self.logger.info("Clipper is running") self.connected = True except ClipperException as e: - logger.warning("Error starting Clipper: {}".format(e.msg)) + self.logger.warning("Error starting Clipper: {}".format(e.msg)) raise e def connect(self): @@ -134,7 +136,7 @@ def connect(self): self.cm.connect() self.connected = True - logger.info("Successfully connected to Clipper cluster at {}".format( + self.logger.info("Successfully connected to Clipper cluster at {}".format( self.cm.get_query_addr())) def register_application(self, name, input_type, default_output, @@ -190,14 +192,14 @@ def register_application(self, name, input_type, default_output, }) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code != requests.codes.ok: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) else: - logger.info("Application {app} was successfully registered".format( + self.logger.info("Application {app} was successfully registered".format( app=name)) def delete_application(self, name): @@ -209,14 +211,14 @@ def delete_application(self, name): req_json = json.dumps({"name": name}) headers = {"Content-type": "application/json"} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code != requests.codes.ok: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) else: - logger.info( + self.logger.info( "Application {app} was successfully deleted".format(app=name)) def link_model_to_app(self, app_name, model_name): @@ -251,14 +253,14 @@ def link_model_to_app(self, app_name, model_name): }) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code != requests.codes.ok: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) else: - logger.info( + self.logger.info( "Model {model} is now linked to application {app}".format( model=model_name, app=app_name)) @@ -447,17 +449,17 @@ def build_model(self, image = "{reg}/{image}".format( reg=container_registry, image=image) docker_client = docker.from_env() - logger.info( + self.logger.info( "Building model Docker image with model data from {}".format( model_data_path)) image_result, build_logs = docker_client.images.build( fileobj=context_file, custom_context=True, tag=image) for b in build_logs: - logger.info(b) + self.logger.info(b) - logger.info("Pushing model Docker image to {}".format(image)) + self.logger.info("Pushing model Docker image to {}".format(image)) for line in docker_client.images.push(repository=image, stream=True): - logger.debug(line) + self.logger.debug(line) return image def deploy_model(self, @@ -549,7 +551,7 @@ def deploy_model(self, image=image, labels=labels, batch_size=batch_size) - logger.info("Done deploying model {name}:{version}.".format( + self.logger.info("Done deploying model {name}:{version}.".format( name=name, version=version)) def register_model(self, @@ -621,16 +623,16 @@ def register_model(self, }) headers = {'Content-type': 'application/json'} - logger.debug(req_json) + self.logger.debug(req_json) r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code != requests.codes.ok: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) else: - logger.info( + self.logger.info( "Successfully registered model {name}:{version}".format( name=name, version=version)) @@ -734,12 +736,12 @@ def set_num_replicas(self, name, num_replicas, version=None): msg = ("Cannot resize the replica set for containerless model " "{name}:{version}").format( name=name, version=version) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) else: msg = "Cannot add container for non-registered model {name}:{version}".format( name=name, version=version) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_all_apps(self, verbose=False): @@ -772,14 +774,14 @@ def get_all_apps(self, verbose=False): req_json = json.dumps({"verbose": verbose}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: return r.json() else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_app_info(self, name): @@ -810,12 +812,12 @@ def get_app_info(self, name): req_json = json.dumps({"name": name}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: app_info = r.json() if len(app_info) == 0: - logger.warning( + self.logger.warning( "Application {} is not registered with Clipper".format( name)) return None @@ -823,7 +825,7 @@ def get_app_info(self, name): else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_linked_models(self, app_name): @@ -853,13 +855,13 @@ def get_linked_models(self, app_name): req_json = json.dumps({"app_name": app_name}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: return r.json() else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_all_models(self, verbose=False): @@ -889,14 +891,14 @@ def get_all_models(self, verbose=False): req_json = json.dumps({"verbose": verbose}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: return r.json() else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_model_info(self, name, version): @@ -929,12 +931,12 @@ def get_model_info(self, name, version): req_json = json.dumps({"model_name": name, "model_version": version}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: model_info = r.json() if len(model_info) == 0: - logger.warning( + self.logger.warning( "Model {name}:{version} is not registered with Clipper.". format(name=name, version=version)) return None @@ -942,7 +944,7 @@ def get_model_info(self, name, version): else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_all_model_replicas(self, verbose=False): @@ -972,13 +974,13 @@ def get_all_model_replicas(self, verbose=False): req_json = json.dumps({"verbose": verbose}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: return r.json() else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_model_replica_info(self, name, version, replica_id): @@ -1016,12 +1018,12 @@ def get_model_replica_info(self, name, version, replica_id): }) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: model_rep_info = r.json() if len(model_rep_info) == 0: - logger.warning( + self.logger.warning( "No model replica with ID {rep_id} found for model {name}:{version}". format(rep_id=replica_id, name=name, version=version)) return None @@ -1029,7 +1031,7 @@ def get_model_replica_info(self, name, version, replica_id): else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def get_clipper_logs(self, logging_dir="clipper_logs/"): @@ -1069,13 +1071,13 @@ def inspect_instance(self): raise UnconnectedException() url = "http://{host}/metrics".format(host=self.cm.get_query_addr()) r = requests.get(url) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code == requests.codes.ok: return r.json() else: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) def set_model_version(self, name, version, num_replicas=None): @@ -1114,11 +1116,11 @@ def set_model_version(self, name, version, num_replicas=None): req_json = json.dumps({"model_name": name, "model_version": version}) headers = {'Content-type': 'application/json'} r = requests.post(url, headers=headers, data=req_json) - logger.debug(r.text) + self.logger.debug(r.text) if r.status_code != requests.codes.ok: msg = "Received error status code: {code} and message: {msg}".format( code=r.status_code, msg=r.text) - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) if num_replicas is not None: @@ -1171,7 +1173,7 @@ def stop_models(self, model_names): model_dict[m["model_name"]] = [m["model_version"]] self.cm.stop_models(model_dict) pp = pprint.PrettyPrinter(indent=4) - logger.info( + self.logger.info( "Stopped all containers for these models and versions:\n{}".format( pp.pformat(model_dict))) @@ -1197,7 +1199,7 @@ def stop_versioned_models(self, model_versions_dict): raise UnconnectedException() self.cm.stop_models(model_versions_dict) pp = pprint.PrettyPrinter(indent=4) - logger.info( + self.logger.info( "Stopped all containers for these models and versions:\n{}".format( pp.pformat(model_versions_dict))) @@ -1233,7 +1235,7 @@ def stop_inactive_model_versions(self, model_names): model_dict[m["model_name"]] = [m["model_version"]] self.cm.stop_models(model_dict) pp = pprint.PrettyPrinter(indent=4) - logger.info( + self.logger.info( "Stopped all containers for these models and versions:\n{}".format( pp.pformat(model_dict))) @@ -1245,7 +1247,7 @@ def stop_all_model_containers(self): ``connect`` first. """ self.cm.stop_all_model_containers() - logger.info("Stopped all Clipper model containers") + self.logger.info("Stopped all Clipper model containers") def stop_all(self, graceful=True): """Stops all processes that were started via Clipper admin commands. @@ -1258,7 +1260,7 @@ def stop_all(self, graceful=True): will take not effect in Kubernetes. """ self.cm.stop_all(graceful=graceful) - logger.info("Stopped all Clipper cluster and all model containers") + self.logger.info("Stopped all Clipper cluster and all model containers") def test_predict_function(self, query, func, input_type): """Tests that the user's function has the correct signature and can be properly saved and @@ -1333,7 +1335,7 @@ def test_predict_function(self, query, func, input_type): try: assert reloaded_func except AssertionError: - logger.error("Function does not properly serialize and reload") + self.logger.error("Function does not properly serialize and reload") return "Function does not properly serialize and reload" return reloaded_func(numpy_data) diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index b745d92cc..729859d2c 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -2,6 +2,7 @@ from .exceptions import ClipperException import random import socket +import logging # Constants CLIPPER_INTERNAL_QUERY_PORT = 1337 @@ -34,6 +35,20 @@ _MODEL_CONTAINER_LABEL_DELIMITER = "_" +class ClusterAdapter(logging.LoggerAdapter): + """ + This adapter adds cluster name to logging format. + + Usage + ----- + In ContainerManager init process, do: + self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_name}) + """ + def process(self, msg, kwargs): + return "[{}] {}".format(self.extra['cluster_name'], msg), kwargs + + + def create_model_container_label(name, version): return "{name}{delim}{version}".format( name=name, delim=_MODEL_CONTAINER_LABEL_DELIMITER, version=version) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index ae72bbfdc..882d2e44c 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -14,14 +14,14 @@ CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL, CLIPPER_INTERNAL_RPC_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_INTERNAL_REDIS_PORT, - CLIPPER_DOCKER_PORT_LABELS, CLIPPER_METRIC_CONFIG_LABEL) + CLIPPER_DOCKER_PORT_LABELS, CLIPPER_METRIC_CONFIG_LABEL, + ClusterAdapter) from ..exceptions import ClipperException from requests.exceptions import ConnectionError from .docker_metric_utils import * logger = logging.getLogger(__name__) - class DockerContainerManager(ContainerManager): def __init__(self, cluster_name="default-cluster", @@ -102,6 +102,8 @@ def __init__(self, self.extra_container_kwargs.update(container_args) + self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_name}) + def start_clipper(self, query_frontend_image, mgmt_frontend_image, @@ -119,7 +121,7 @@ def start_clipper(self, self.docker_client.networks.create( self.docker_network, check_duplicate=True) except docker.errors.APIError: - logger.debug( + self.logger.debug( "{nw} network already exists".format(nw=self.docker_network)) except ConnectionError: msg = "Unable to Connect to Docker. Please Check if Docker is running." @@ -137,7 +139,7 @@ def start_clipper(self, self.cluster_name)) if not self.external_redis: - logger.info("Starting managed Redis instance in Docker") + self.logger.info("Starting managed Redis instance in Docker") self.redis_port = find_unbound_port(self.redis_port) redis_labels = self.common_labels.copy() redis_labels[CLIPPER_DOCKER_PORT_LABELS['redis']] = str( @@ -289,7 +291,7 @@ def _add_replica(self, name, version, input_type, image): ] }) if len(containers) < 1: - logger.warning("No Clipper query frontend found.") + self.logger.warning("No Clipper query frontend found.") raise ClipperException( "No Clipper query frontend to attach model container to") query_frontend_hostname = containers[0].name @@ -328,7 +330,7 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): current_replicas = self._get_replicas(name, version) if len(current_replicas) < num_replicas: num_missing = num_replicas - len(current_replicas) - logger.info( + self.logger.info( "Found {cur} replicas for {name}:{version}. Adding {missing}". format( cur=len(current_replicas), @@ -349,7 +351,7 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): elif len(current_replicas) > num_replicas: num_extra = len(current_replicas) - num_replicas - logger.info( + self.logger.info( "Found {cur} replicas for {name}:{version}. Removing {extra}". format( cur=len(current_replicas), @@ -376,7 +378,7 @@ def get_logs(self, logging_dir): log_files = [] if not os.path.exists(logging_dir): os.makedirs(logging_dir) - logger.info("Created logging directory: %s" % logging_dir) + self.logger.info("Created logging directory: %s" % logging_dir) for c in containers: log_file_name = "image_{image}:container_{id}.log".format( image=c.image.short_id, id=c.short_id) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 648670b28..bf541b450 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -3,7 +3,8 @@ create_model_container_label, ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_ID_LABEL, CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_QUERY_PORT, - CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_NAME_LABEL) + CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_NAME_LABEL, + ClusterAdapter) from ..exceptions import ClipperException from .kubernetes_metric_utils import PROM_VERSION, CLIPPER_FRONTEND_EXPORTER_IMAGE @@ -127,6 +128,8 @@ def __init__(self, loader=jinja2.FileSystemLoader(cur_dir, followlinks=True), undefined=jinja2.StrictUndefined) + self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_name}) + def start_clipper(self, query_frontend_image, mgmt_frontend_image, @@ -254,18 +257,18 @@ def connect(self): if len(external_node_hosts) == 0 and (self.useInternalIP): msg = "No external node addresses found. Using Internal IP address" - logger.warn(msg) + self.logger.warn(msg) for addr in node.status.addresses: if addr.type == "InternalIP": external_node_hosts.append(addr.address) if len(external_node_hosts) == 0: msg = "Error connecting to Kubernetes cluster. No external node addresses found" - logger.error(msg) + self.logger.error(msg) raise ClipperException(msg) self.external_node_hosts = external_node_hosts - logger.info("Found {num_nodes} nodes: {nodes}".format( + self.logger.info("Found {num_nodes} nodes: {nodes}".format( num_nodes=len(external_node_hosts), nodes=", ".join(external_node_hosts))) @@ -277,7 +280,7 @@ def connect(self): for p in mgmt_frontend_ports: if p.name == "1338": self.clipper_management_port = p.node_port - logger.info("Setting Clipper mgmt port to {}".format( + self.logger.info("Setting Clipper mgmt port to {}".format( self.clipper_management_port)) query_frontend_ports = self._k8s_v1.read_namespaced_service( @@ -287,7 +290,7 @@ def connect(self): for p in query_frontend_ports: if p.name == "1337": self.clipper_query_port = p.node_port - logger.info("Setting Clipper query port to {}".format( + self.logger.info("Setting Clipper query port to {}".format( self.clipper_query_port)) elif p.name == "7000": self.clipper_rpc_port = p.node_port @@ -309,7 +312,7 @@ def connect(self): for p in metrics_ports: if p.name == "9090": self.clipper_metric_port = p.node_port - logger.info("Setting Clipper metric port to {}".format( + self.logger.info("Setting Clipper metric port to {}".format( self.clipper_metric_port)) except ApiException as e: @@ -380,7 +383,7 @@ def get_logs(self, logging_dir): log_files = [] if not os.path.exists(logging_dir): os.makedirs(logging_dir) - logger.info("Created logging directory: %s" % logging_dir) + self.logger.info("Created logging directory: %s" % logging_dir) for pod in self._k8s_v1.list_namespaced_pod( namespace='default', @@ -391,8 +394,8 @@ def get_logs(self, logging_dir): log_file_name = "{pod}_{num}.log".format( pod=pod.metadata.name, num=str(i)) log_file_alt = "{cname}.log".format(cname=c.name) - logger.info("log file name: {}".format(log_file_name)) - logger.info("log alt file name: {}".format(log_file_alt)) + self.logger.info("log file name: {}".format(log_file_name)) + self.logger.info("log alt file name: {}".format(log_file_alt)) log_file = os.path.join(logging_dir, log_file_name) with open(log_file, "w") as lf: lf.write( @@ -418,7 +421,7 @@ def stop_models(self, models): cluster_label=CLIPPER_DOCKER_LABEL, cluster_name=self.cluster_name)) except ApiException as e: - logger.warn( + self.logger.warn( "Exception deleting kubernetes deployments: {}".format(e)) raise e @@ -432,12 +435,12 @@ def stop_all_model_containers(self): cluster_label=CLIPPER_DOCKER_LABEL, cluster_name=self.cluster_name)) except ApiException as e: - logger.warn( + self.logger.warn( "Exception deleting kubernetes deployments: {}".format(e)) raise e def stop_all(self, graceful=True): - logger.info("Stopping all running Clipper resources") + self.logger.info("Stopping all running Clipper resources") cluster_selecter = "{cluster_label}={cluster_name}".format( cluster_label=CLIPPER_DOCKER_LABEL, cluster_name=self.cluster_name) diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index 9a2009f45..4f0fbea12 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -11,7 +11,7 @@ import numpy as np import requests import yaml -from test_utils import (log_clipper_state) +from test_utils import log_clipper_state, create_docker_connection cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) @@ -86,10 +86,8 @@ def log_docker_ps(clipper_conn): logger = logging.getLogger(__name__) logger.info("Start Metric Test (0/1): Running 2 Replicas") - clipper_conn = ClipperConnection( - DockerContainerManager( - cluster_name='cluster-'.format(random.randint(0, 50000)), - redis_port=6380)) + cluster_name = "cluster-{}".format(random.randint(0,50000) + clipper_conn = create_docker_connection(cleanup=False, start_clipper=True, new_name=cluster_name)) clipper_conn.start_clipper() python_deployer.create_endpoint( clipper_conn, "simple-example", "doubles", feature_sum, num_replicas=2) @@ -120,7 +118,7 @@ def log_docker_ps(clipper_conn): logger.info("Test 2 Passed") logger.info("Metric Test Done, Cleaning up...") - clipper_conn.stop_all() + create_docker_connection(cleanup=True, cleanup_name=cluster_name) except Exception as e: log_docker_ps(clipper_conn) logger.error(e) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 2e659ddc8..58261c5a4 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -82,7 +82,7 @@ def create_docker_connection(cleanup=True, cl = ClipperConnection(cm) cl.stop_all(graceful=False) docker_client = get_docker_client() - docker_client.containers.prune(filters={"label": CLIPPER_DOCKER_LABEL}) + docker_client.containers.prune(filters={"label": "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=cleanup_name)}) if start_clipper: # Try to start Clipper in a retry loop here to address flaky tests From 69c7f1bc7d8cd94e47358ce666635e427c51a78f Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 1 Jun 2018 23:01:08 +0000 Subject: [PATCH 23/63] Fix docker issue on ubuntu --- clipper_admin/clipper_admin/clipper_admin.py | 3 ++- .../clipper_admin/docker/docker_metric_utils.py | 1 + integration-tests/clipper_admin_tests.py | 17 ++++++++++++++--- integration-tests/clipper_metric_docker.py | 5 ++--- integration-tests/multi_tenency_test.py | 2 +- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 567b89670..a2309c355 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -455,7 +455,8 @@ def build_model(self, image_result, build_logs = docker_client.images.build( fileobj=context_file, custom_context=True, tag=image) for b in build_logs: - self.logger.info(b) + if 'stream' in b and b['stream'] != '\n': #log build steps only + self.logger.info(b['stream'].rstrip()) self.logger.info("Pushing model Docker image to {}".format(image)) for line in docker_client.images.push(repository=image, stream=True): diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index b83817088..7a4642539 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -102,6 +102,7 @@ def run_metric_image(docker_client, common_labels, prometheus_port, 'mode': 'ro' } }, + user='root', # prom use nobody by default but it can't access config. labels=metric_labels, **extra_container_kwargs) diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index 2a725baeb..0135ffb43 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -27,6 +27,7 @@ from clipper_admin.deployers.python import create_endpoint as create_py_endpoint from clipper_admin.deployers.python import deploy_python_closure from clipper_admin import __version__ as clipper_version +from clipper_admin.container_manager import CLIPPER_DOCKER_LABEL logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', @@ -378,7 +379,10 @@ def predict_func(inputs): filters={ "ancestor": "clipper/python-closure-container:{}".format( - clipper_version) + clipper_version), + "label" : "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.clipper_conn.cm.cluster_name) }) elif py_minor_version == (3, 5): @@ -386,14 +390,21 @@ def predict_func(inputs): filters={ "ancestor": "clipper/python35-closure-container:{}".format( - clipper_version) + clipper_version), + "label" : "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.clipper_conn.cm.cluster_name) + }) elif py_minor_version == (3, 6): containers = docker_client.containers.list( filters={ "ancestor": "clipper/python36-closure-container:{}".format( - clipper_version) + clipper_version), + "label" : "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.clipper_conn.cm.cluster_name) }) else: msg = ( diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index 4f0fbea12..8b55576f4 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -86,9 +86,8 @@ def log_docker_ps(clipper_conn): logger = logging.getLogger(__name__) logger.info("Start Metric Test (0/1): Running 2 Replicas") - cluster_name = "cluster-{}".format(random.randint(0,50000) - clipper_conn = create_docker_connection(cleanup=False, start_clipper=True, new_name=cluster_name)) - clipper_conn.start_clipper() + cluster_name = "cluster-{}".format(random.randint(0,50000)) + clipper_conn = create_docker_connection(cleanup=False, start_clipper=True, new_name=cluster_name) python_deployer.create_endpoint( clipper_conn, "simple-example", "doubles", feature_sum, num_replicas=2) time.sleep(2) diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenency_test.py index 384a0364d..058512701 100644 --- a/integration-tests/multi_tenency_test.py +++ b/integration-tests/multi_tenency_test.py @@ -72,7 +72,7 @@ def predict_(addr, x, batch=False): if __name__ == '__main__': - if sys.argv[1] == '--kubernetes': + if len(sys.argv) > 1 and sys.argv[1] == '--kubernetes': test(kubernetes=True) else: test(kubernetes=False) From eabf4280804661d6f4abc197b8d066bc362e7b34 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 1 Jun 2018 16:04:16 -0700 Subject: [PATCH 24/63] Modify comment about prometheus --- clipper_admin/clipper_admin/docker/docker_metric_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index 7a4642539..47c2cde4d 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -82,7 +82,7 @@ def run_metric_image(docker_client, common_labels, prometheus_port, :return: None """ - # CMD comes from https://hub.docker.com/r/prom/prometheus/~/dockerfile/ + # CMD comes from https://github.com/prometheus/prometheus/blob/release-2.1/Dockerfile metric_cmd = [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", From 7b0346d1242024987250a955ff351646119b9063 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 1 Jun 2018 16:35:29 -0700 Subject: [PATCH 25/63] Fix some kubernetes test --- bin/run_ci.sh | 3 --- .../kubernetes_integration_test.py | 5 ++--- .../kubernetes_multi_frontend.py | 19 ++++++------------- integration-tests/multi_tenency_test.py | 4 ++-- integration-tests/test_utils.py | 12 +++++++----- 5 files changed, 17 insertions(+), 26 deletions(-) diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 2980d2b77..6af56f434 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -7,9 +7,6 @@ set -o pipefail # Printout for timeout debug date -echo "Simon: Failure Intended. Jenkins don't test this" -exit 1 - unset CDPATH # one-liner from http://stackoverflow.com/a/246128 # Determines absolute path of the directory containing diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index 744d002d9..0816efc93 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -9,7 +9,7 @@ import time import logging from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state) + fake_model_data, headers, log_clipper_state, CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException @@ -34,8 +34,7 @@ def deploy_model(clipper_conn, name, version, link=False): fake_model_data, "clipper/noop-container:{}".format(clipper_version), num_replicas=1, - container_registry= - "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") + container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) if link: diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index caac694a1..4b9d0829b 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -14,7 +14,7 @@ import logging import yaml from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state) + fake_model_data, headers, log_clipper_state, CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) @@ -39,8 +39,7 @@ def deploy_model(clipper_conn, name, link=False): fake_model_data, "clipper/noop-container:{}".format(clipper_version), num_replicas=1, - container_registry= - "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") + container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) if link: @@ -118,7 +117,7 @@ def create_and_test_app(clipper_conn, name): k8s_beta = clipper_conn.cm._k8s_beta if (k8s_beta.read_namespaced_deployment( - 'query-frontend-0', namespace='default').to_dict() + 'query-frontend-0-at-{}'.format(cluster_name), namespace='default').to_dict() ['status']['available_replicas'] != 1): raise BenchmarkException( "Wrong number of replicas of query-frontend-0." @@ -128,7 +127,7 @@ def create_and_test_app(clipper_conn, name): 'query-frontend-0', namespace='default').to_dict()[ 'status']['available_replicas'])) if (k8s_beta.read_namespaced_deployment( - 'query-frontend-1', namespace='default').to_dict() + 'query-frontend-1-at-{}'.format(cluster_name), namespace='default').to_dict() ['status']['available_replicas'] != 1): raise BenchmarkException( "Wrong number of replicas of query-frontend-1." @@ -143,8 +142,8 @@ def create_and_test_app(clipper_conn, name): svc_lists = k8s_v1.list_namespaced_service( namespace='default').to_dict()['items'] svc_names = [svc['metadata']['name'] for svc in svc_lists] - if not ('query-frontend-0' in svc_names - and 'query-frontend-1' in svc_names): + if not ('query-frontend-0-at-{}'.format(cluster_name) in svc_names + and 'query-frontend-1-at-{}'.format(cluster_name) in svc_names): raise BenchmarkException( "Error creating query frontend RPC services") logger.info("Ok: we have 2 query-frontend rpc services") @@ -173,10 +172,4 @@ def create_and_test_app(clipper_conn, name): sys.exit(1) except Exception as e: logger.exception("Exception: {}".format(e)) - - # Added debug lines in case it fails - os.system("kubectl get pods") - os.system("kubectl describe pods") - # End Debug - sys.exit(1) diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenency_test.py index 058512701..23fb10cad 100644 --- a/integration-tests/multi_tenency_test.py +++ b/integration-tests/multi_tenency_test.py @@ -8,7 +8,7 @@ from datetime import datetime import os import time -from test_utils import create_kubernetes_connection, create_docker_connection +from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY def test(kubernetes): @@ -48,7 +48,7 @@ def deploy_(clipper_conn, use_kubernetes=False): "simple-example", "doubles", feature_sum, - registry="568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") + registry=CLIPPER_CONTAINER_REGISTRY) else: python_deployer.create_endpoint(clipper_conn, "simple-example", "doubles", feature_sum) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 58261c5a4..6829649a5 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -36,6 +36,8 @@ def __str__(self): # range of ports where available ports can be found PORT_RANGE = [34256, 50000] +CLIPPER_CONTAINER_REGISTRY = "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper" + def get_docker_client(): if "DOCKER_API_VERSION" in os.environ: @@ -109,9 +111,9 @@ def create_docker_connection(cleanup=True, return cl -def create_kubernetes_connection(cleanup=True, - start_clipper=True, - connect=True, +def create_kubernetes_connection(cleanup=False, + start_clipper=False, + connect=False, with_proxy=False, num_frontend_replicas=1, cleanup_name='default-cluster', @@ -119,7 +121,7 @@ def create_kubernetes_connection(cleanup=True, connect_name='default-cluster'): logger.info("Creating KubernetesContainerManager") cl = None - assert cleanup or start_clipper, "You must set at least one of {cleanup, start_clipper} to be true." + assert cleanup or start_clipper or connect, "You must set at least one of {cleanup, start_clipper, connect} to be true." if with_proxy: kubernetes_proxy_addr = "127.0.0.1:8080" @@ -138,7 +140,7 @@ def create_kubernetes_connection(cleanup=True, if start_clipper: logger.info("Starting up Kubernetes Cluster {}".format(new_name)) cm = KubernetesContainerManager( - cluster_name=cleanup_name, + cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.start_clipper( From ffeef95796de7917333b6712b6ebbc10479a039c Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 1 Jun 2018 16:41:38 -0700 Subject: [PATCH 26/63] Interpreable cluster name --- integration-tests/clipper_admin_tests.py | 8 +++++--- integration-tests/clipper_metric_docker.py | 3 ++- integration-tests/clipper_metric_kube.py | 2 +- integration-tests/deploy_mxnet_models.py | 2 +- integration-tests/deploy_pyspark_models.py | 2 +- integration-tests/deploy_pyspark_pipeline_models.py | 2 +- integration-tests/deploy_pyspark_sparkml_models.py | 2 +- integration-tests/deploy_pytorch_models.py | 2 +- integration-tests/deploy_pytorch_to_caffe2_with_onnx.py | 2 +- integration-tests/deploy_tensorflow_models.py | 2 +- integration-tests/deploy_xgboost_models.py | 2 +- integration-tests/kubernetes_integration_test.py | 4 ++-- integration-tests/kubernetes_multi_frontend.py | 2 +- integration-tests/many_apps_many_models.py | 2 +- integration-tests/multi_tenency_test.py | 4 ++-- 15 files changed, 22 insertions(+), 19 deletions(-) diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index 0135ffb43..099bc3596 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -39,7 +39,7 @@ class ClipperManagerTestCaseShort(unittest.TestCase): def setUp(self): - new_name = "cluster-{}".format(random.randint(0, 5000)) + new_name = "admin-test-cluster-{}".format(random.randint(0, 5000)) self.clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=new_name) self.name = new_name @@ -579,10 +579,12 @@ def test_build_model_with_custom_packages(self): class ClipperManagerTestCaseLong(unittest.TestCase): + cluster_name = "admin-test-long-cluster-{}".format(random.randint(0,50000)) + @classmethod def setUpClass(self): self.clipper_conn = create_docker_connection( - cleanup=True, start_clipper=True) + cleanup=False, start_clipper=True, new_name=self.cluster_name) self.app_name_1 = "app3" self.app_name_2 = "app4" self.app_name_3 = "app5" @@ -622,7 +624,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): self.clipper_conn = create_docker_connection( - cleanup=True, start_clipper=False) + cleanup=True, start_clipper=False, cleanup_name=self.cluster_name) def test_unlinked_app_returns_default_predictions(self): addr = self.clipper_conn.get_query_addr() diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index 8b55576f4..8a3908a75 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -86,7 +86,8 @@ def log_docker_ps(clipper_conn): logger = logging.getLogger(__name__) logger.info("Start Metric Test (0/1): Running 2 Replicas") - cluster_name = "cluster-{}".format(random.randint(0,50000)) + + cluster_name = "metric-docker-cluster-{}".format(random.randint(0,50000)) clipper_conn = create_docker_connection(cleanup=False, start_clipper=True, new_name=cluster_name) python_deployer.create_endpoint( clipper_conn, "simple-example", "doubles", feature_sum, num_replicas=2) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index dc587d685..3d956ee9d 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -138,7 +138,7 @@ def check_target_health(metric_addr): if __name__ == "__main__": import random - cluster_name = 'cluster-{}'.format(random.randint(0, 5000)) + cluster_name = 'metric-k8s-cluster-{}'.format(random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( new_name=cluster_name, cleanup=False, start_clipper=True) diff --git a/integration-tests/deploy_mxnet_models.py b/integration-tests/deploy_mxnet_models.py index bf1a8d993..9548d44e3 100644 --- a/integration-tests/deploy_mxnet_models.py +++ b/integration-tests/deploy_mxnet_models.py @@ -92,7 +92,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "mxnet-cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( new_name=cluster_name, cleanup=False, start_clipper=True) diff --git a/integration-tests/deploy_pyspark_models.py b/integration-tests/deploy_pyspark_models.py index b11aee4a8..1a9878908 100644 --- a/integration-tests/deploy_pyspark_models.py +++ b/integration-tests/deploy_pyspark_models.py @@ -122,7 +122,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "pyspark-cluster-{}".format(random.randint(0, 5000)) try: spark = SparkSession\ diff --git a/integration-tests/deploy_pyspark_pipeline_models.py b/integration-tests/deploy_pyspark_pipeline_models.py index 35c6705a4..4ce0fe59a 100644 --- a/integration-tests/deploy_pyspark_pipeline_models.py +++ b/integration-tests/deploy_pyspark_pipeline_models.py @@ -56,7 +56,7 @@ def predict(spark, pipeline, xs): def run_test(): import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "pyspark-pipeline-cluster-{}".format(random.randint(0, 5000)) spark = SparkSession\ .builder\ diff --git a/integration-tests/deploy_pyspark_sparkml_models.py b/integration-tests/deploy_pyspark_sparkml_models.py index d54371184..b96256c51 100644 --- a/integration-tests/deploy_pyspark_sparkml_models.py +++ b/integration-tests/deploy_pyspark_sparkml_models.py @@ -117,7 +117,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "pyspark-ml-cluster-{}".format(random.randint(0, 5000)) try: spark = SparkSession\ .builder\ diff --git a/integration-tests/deploy_pytorch_models.py b/integration-tests/deploy_pytorch_models.py index 08a8be577..10c9e35db 100644 --- a/integration-tests/deploy_pytorch_models.py +++ b/integration-tests/deploy_pytorch_models.py @@ -161,7 +161,7 @@ def __getitem__(self, index): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "pytorch-cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) diff --git a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py index ed5445169..891a4c79a 100644 --- a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py +++ b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py @@ -169,7 +169,7 @@ def __getitem__(self, index): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "onnx-cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) diff --git a/integration-tests/deploy_tensorflow_models.py b/integration-tests/deploy_tensorflow_models.py index 8cd35a27e..265b8b0dd 100644 --- a/integration-tests/deploy_tensorflow_models.py +++ b/integration-tests/deploy_tensorflow_models.py @@ -152,7 +152,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "tf-cluster-{}".format(random.randint(0, 5000)) try: sess = None clipper_conn = create_docker_connection( diff --git a/integration-tests/deploy_xgboost_models.py b/integration-tests/deploy_xgboost_models.py index ac1cc3da4..834b1236d 100644 --- a/integration-tests/deploy_xgboost_models.py +++ b/integration-tests/deploy_xgboost_models.py @@ -84,7 +84,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "xgboost-cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index 0816efc93..0f5164ee4 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -148,7 +148,7 @@ def test_kubernetes(clipper_conn, num_apps, num_models): # Test without proxy first import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "k8s-test-cluster-{}".format(random.randint(0, 5000)) clipper_conn = create_kubernetes_connection( cleanup=False, @@ -159,7 +159,7 @@ def test_kubernetes(clipper_conn, num_apps, num_models): clipper_conn.stop_all() # Test with proxy. Assumes proxy is running at 127.0.0.1:8080 - proxy_name = "cluster-{}".format(random.randint(0, 5000)) + proxy_name = "k8s-proxy-test-cluster-{}".format(random.randint(0, 5000)) clipper_conn = create_kubernetes_connection( cleanup=True, start_clipper=True, diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index 4b9d0829b..86f5af42d 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -101,7 +101,7 @@ def create_and_test_app(clipper_conn, name): if __name__ == "__main__": import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "kube-multi-frontend-cluster-{}".format(random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( cleanup=False, diff --git a/integration-tests/many_apps_many_models.py b/integration-tests/many_apps_many_models.py index 332806f58..34a5d8780 100644 --- a/integration-tests/many_apps_many_models.py +++ b/integration-tests/many_apps_many_models.py @@ -98,7 +98,7 @@ def create_and_test_app(clipper_conn, name, num_models): num_models = 8 import random - cluster_name = "cluster-{}".format(random.randint(0, 5000)) + cluster_name = "many-app-many-models-cluster-{}".format(random.randint(0, 5000)) try: if len(sys.argv) > 1: diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenency_test.py index 23fb10cad..d523484d0 100644 --- a/integration-tests/multi_tenency_test.py +++ b/integration-tests/multi_tenency_test.py @@ -12,8 +12,8 @@ def test(kubernetes): - conn_1 = create('cluster-1', use_kubernetes=kubernetes) - conn_2 = create('cluster-2', use_kubernetes=kubernetes) + conn_1 = create('multi-tenancy-test-cluster-1', use_kubernetes=kubernetes) + conn_2 = create('multi-tenancy-test-cluster-2', use_kubernetes=kubernetes) deploy_(conn_1, use_kubernetes=kubernetes) deploy_(conn_2, use_kubernetes=kubernetes) From 0548997b71a2efccb5b71be3bfb8845e11672991 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 1 Jun 2018 16:42:22 -0700 Subject: [PATCH 27/63] Format code --- clipper_admin/clipper_admin/clipper_admin.py | 22 ++++++++++------ .../clipper_admin/container_manager.py | 2 +- .../docker/docker_container_manager.py | 8 +++--- .../docker/docker_metric_utils.py | 2 +- .../kubernetes_container_manager.py | 12 +++++---- integration-tests/clipper_admin_tests.py | 25 +++++++++++-------- integration-tests/clipper_metric_docker.py | 5 ++-- .../deploy_pyspark_pipeline_models.py | 3 ++- .../kubernetes_integration_test.py | 6 +++-- .../kubernetes_multi_frontend.py | 19 ++++++++------ integration-tests/many_apps_many_models.py | 3 ++- integration-tests/test_utils.py | 10 +++++--- 12 files changed, 72 insertions(+), 45 deletions(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index a2309c355..49d353f84 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -79,7 +79,9 @@ def __init__(self, container_manager): self.connected = False self.cm = container_manager - self.logger = ClusterAdapter(logger, {'cluster_name': self.cm.cluster_name}) + self.logger = ClusterAdapter(logger, { + 'cluster_name': self.cm.cluster_name + }) def start_clipper( self, @@ -136,8 +138,9 @@ def connect(self): self.cm.connect() self.connected = True - self.logger.info("Successfully connected to Clipper cluster at {}".format( - self.cm.get_query_addr())) + self.logger.info( + "Successfully connected to Clipper cluster at {}".format( + self.cm.get_query_addr())) def register_application(self, name, input_type, default_output, slo_micros): @@ -199,8 +202,9 @@ def register_application(self, name, input_type, default_output, self.logger.error(msg) raise ClipperException(msg) else: - self.logger.info("Application {app} was successfully registered".format( - app=name)) + self.logger.info( + "Application {app} was successfully registered".format( + app=name)) def delete_application(self, name): if not self.connected: @@ -455,7 +459,7 @@ def build_model(self, image_result, build_logs = docker_client.images.build( fileobj=context_file, custom_context=True, tag=image) for b in build_logs: - if 'stream' in b and b['stream'] != '\n': #log build steps only + if 'stream' in b and b['stream'] != '\n': #log build steps only self.logger.info(b['stream'].rstrip()) self.logger.info("Pushing model Docker image to {}".format(image)) @@ -1261,7 +1265,8 @@ def stop_all(self, graceful=True): will take not effect in Kubernetes. """ self.cm.stop_all(graceful=graceful) - self.logger.info("Stopped all Clipper cluster and all model containers") + self.logger.info( + "Stopped all Clipper cluster and all model containers") def test_predict_function(self, query, func, input_type): """Tests that the user's function has the correct signature and can be properly saved and @@ -1336,7 +1341,8 @@ def test_predict_function(self, query, func, input_type): try: assert reloaded_func except AssertionError: - self.logger.error("Function does not properly serialize and reload") + self.logger.error( + "Function does not properly serialize and reload") return "Function does not properly serialize and reload" return reloaded_func(numpy_data) diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 729859d2c..f92f02f26 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -44,11 +44,11 @@ class ClusterAdapter(logging.LoggerAdapter): In ContainerManager init process, do: self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_name}) """ + def process(self, msg, kwargs): return "[{}] {}".format(self.extra['cluster_name'], msg), kwargs - def create_model_container_label(name, version): return "{name}{delim}{version}".format( name=name, delim=_MODEL_CONTAINER_LABEL_DELIMITER, version=version) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 882d2e44c..369749db0 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -14,14 +14,14 @@ CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL, CLIPPER_INTERNAL_RPC_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_INTERNAL_REDIS_PORT, - CLIPPER_DOCKER_PORT_LABELS, CLIPPER_METRIC_CONFIG_LABEL, - ClusterAdapter) + CLIPPER_DOCKER_PORT_LABELS, CLIPPER_METRIC_CONFIG_LABEL, ClusterAdapter) from ..exceptions import ClipperException from requests.exceptions import ConnectionError from .docker_metric_utils import * logger = logging.getLogger(__name__) + class DockerContainerManager(ContainerManager): def __init__(self, cluster_name="default-cluster", @@ -102,7 +102,9 @@ def __init__(self, self.extra_container_kwargs.update(container_args) - self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_name}) + self.logger = ClusterAdapter(logger, { + 'cluster_name': self.cluster_name + }) def start_clipper(self, query_frontend_image, diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index 47c2cde4d..c286599aa 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -102,7 +102,7 @@ def run_metric_image(docker_client, common_labels, prometheus_port, 'mode': 'ro' } }, - user='root', # prom use nobody by default but it can't access config. + user='root', # prom use nobody by default but it can't access config. labels=metric_labels, **extra_container_kwargs) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index bf541b450..5b3a8afa3 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -3,8 +3,7 @@ create_model_container_label, ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_ID_LABEL, CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_QUERY_PORT, - CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_NAME_LABEL, - ClusterAdapter) + CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_NAME_LABEL, ClusterAdapter) from ..exceptions import ClipperException from .kubernetes_metric_utils import PROM_VERSION, CLIPPER_FRONTEND_EXPORTER_IMAGE @@ -128,7 +127,9 @@ def __init__(self, loader=jinja2.FileSystemLoader(cur_dir, followlinks=True), undefined=jinja2.StrictUndefined) - self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_name}) + self.logger = ClusterAdapter(logger, { + 'cluster_name': self.cluster_name + }) def start_clipper(self, query_frontend_image, @@ -312,8 +313,9 @@ def connect(self): for p in metrics_ports: if p.name == "9090": self.clipper_metric_port = p.node_port - self.logger.info("Setting Clipper metric port to {}".format( - self.clipper_metric_port)) + self.logger.info( + "Setting Clipper metric port to {}".format( + self.clipper_metric_port)) except ApiException as e: logging.warn( diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index 099bc3596..ef3d1c1c5 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -380,9 +380,10 @@ def predict_func(inputs): "ancestor": "clipper/python-closure-container:{}".format( clipper_version), - "label" : "{key}={val}".format( - key=CLIPPER_DOCKER_LABEL, - val=self.clipper_conn.cm.cluster_name) + "label": + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.clipper_conn.cm.cluster_name) }) elif py_minor_version == (3, 5): @@ -391,10 +392,10 @@ def predict_func(inputs): "ancestor": "clipper/python35-closure-container:{}".format( clipper_version), - "label" : "{key}={val}".format( - key=CLIPPER_DOCKER_LABEL, - val=self.clipper_conn.cm.cluster_name) - + "label": + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.clipper_conn.cm.cluster_name) }) elif py_minor_version == (3, 6): containers = docker_client.containers.list( @@ -402,9 +403,10 @@ def predict_func(inputs): "ancestor": "clipper/python36-closure-container:{}".format( clipper_version), - "label" : "{key}={val}".format( - key=CLIPPER_DOCKER_LABEL, - val=self.clipper_conn.cm.cluster_name) + "label": + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, + val=self.clipper_conn.cm.cluster_name) }) else: msg = ( @@ -579,7 +581,8 @@ def test_build_model_with_custom_packages(self): class ClipperManagerTestCaseLong(unittest.TestCase): - cluster_name = "admin-test-long-cluster-{}".format(random.randint(0,50000)) + cluster_name = "admin-test-long-cluster-{}".format( + random.randint(0, 50000)) @classmethod def setUpClass(self): diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index 8a3908a75..2a27b4353 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -87,8 +87,9 @@ def log_docker_ps(clipper_conn): logger.info("Start Metric Test (0/1): Running 2 Replicas") - cluster_name = "metric-docker-cluster-{}".format(random.randint(0,50000)) - clipper_conn = create_docker_connection(cleanup=False, start_clipper=True, new_name=cluster_name) + cluster_name = "metric-docker-cluster-{}".format(random.randint(0, 50000)) + clipper_conn = create_docker_connection( + cleanup=False, start_clipper=True, new_name=cluster_name) python_deployer.create_endpoint( clipper_conn, "simple-example", "doubles", feature_sum, num_replicas=2) time.sleep(2) diff --git a/integration-tests/deploy_pyspark_pipeline_models.py b/integration-tests/deploy_pyspark_pipeline_models.py index 4ce0fe59a..9edb7d8c5 100644 --- a/integration-tests/deploy_pyspark_pipeline_models.py +++ b/integration-tests/deploy_pyspark_pipeline_models.py @@ -56,7 +56,8 @@ def predict(spark, pipeline, xs): def run_test(): import random - cluster_name = "pyspark-pipeline-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "pyspark-pipeline-cluster-{}".format( + random.randint(0, 5000)) spark = SparkSession\ .builder\ diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index 0f5164ee4..dfa906783 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -9,7 +9,8 @@ import time import logging from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state, CLIPPER_CONTAINER_REGISTRY) + fake_model_data, headers, log_clipper_state, + CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException @@ -159,7 +160,8 @@ def test_kubernetes(clipper_conn, num_apps, num_models): clipper_conn.stop_all() # Test with proxy. Assumes proxy is running at 127.0.0.1:8080 - proxy_name = "k8s-proxy-test-cluster-{}".format(random.randint(0, 5000)) + proxy_name = "k8s-proxy-test-cluster-{}".format( + random.randint(0, 5000)) clipper_conn = create_kubernetes_connection( cleanup=True, start_clipper=True, diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index 86f5af42d..9acc2134e 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -14,7 +14,8 @@ import logging import yaml from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state, CLIPPER_CONTAINER_REGISTRY) + fake_model_data, headers, log_clipper_state, + CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) @@ -101,7 +102,8 @@ def create_and_test_app(clipper_conn, name): if __name__ == "__main__": import random - cluster_name = "kube-multi-frontend-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "kube-multi-frontend-cluster-{}".format( + random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( cleanup=False, @@ -117,8 +119,9 @@ def create_and_test_app(clipper_conn, name): k8s_beta = clipper_conn.cm._k8s_beta if (k8s_beta.read_namespaced_deployment( - 'query-frontend-0-at-{}'.format(cluster_name), namespace='default').to_dict() - ['status']['available_replicas'] != 1): + 'query-frontend-0-at-{}'.format(cluster_name), + namespace='default').to_dict()['status'] + ['available_replicas'] != 1): raise BenchmarkException( "Wrong number of replicas of query-frontend-0." "Expected {}, found {}".format( @@ -127,8 +130,9 @@ def create_and_test_app(clipper_conn, name): 'query-frontend-0', namespace='default').to_dict()[ 'status']['available_replicas'])) if (k8s_beta.read_namespaced_deployment( - 'query-frontend-1-at-{}'.format(cluster_name), namespace='default').to_dict() - ['status']['available_replicas'] != 1): + 'query-frontend-1-at-{}'.format(cluster_name), + namespace='default').to_dict()['status'] + ['available_replicas'] != 1): raise BenchmarkException( "Wrong number of replicas of query-frontend-1." "Expected {}, found {}".format( @@ -143,7 +147,8 @@ def create_and_test_app(clipper_conn, name): namespace='default').to_dict()['items'] svc_names = [svc['metadata']['name'] for svc in svc_lists] if not ('query-frontend-0-at-{}'.format(cluster_name) in svc_names - and 'query-frontend-1-at-{}'.format(cluster_name) in svc_names): + and 'query-frontend-1-at-{}'.format( + cluster_name) in svc_names): raise BenchmarkException( "Error creating query frontend RPC services") logger.info("Ok: we have 2 query-frontend rpc services") diff --git a/integration-tests/many_apps_many_models.py b/integration-tests/many_apps_many_models.py index 34a5d8780..d5a043c69 100644 --- a/integration-tests/many_apps_many_models.py +++ b/integration-tests/many_apps_many_models.py @@ -98,7 +98,8 @@ def create_and_test_app(clipper_conn, name, num_models): num_models = 8 import random - cluster_name = "many-app-many-models-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "many-app-many-models-cluster-{}".format( + random.randint(0, 5000)) try: if len(sys.argv) > 1: diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 6829649a5..4ef4cc41a 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -84,7 +84,12 @@ def create_docker_connection(cleanup=True, cl = ClipperConnection(cm) cl.stop_all(graceful=False) docker_client = get_docker_client() - docker_client.containers.prune(filters={"label": "{key}={val}".format(key=CLIPPER_DOCKER_LABEL, val=cleanup_name)}) + docker_client.containers.prune( + filters={ + "label": + "{key}={val}".format( + key=CLIPPER_DOCKER_LABEL, val=cleanup_name) + }) if start_clipper: # Try to start Clipper in a retry loop here to address flaky tests @@ -140,8 +145,7 @@ def create_kubernetes_connection(cleanup=False, if start_clipper: logger.info("Starting up Kubernetes Cluster {}".format(new_name)) cm = KubernetesContainerManager( - cluster_name=new_name, - kubernetes_proxy_addr=kubernetes_proxy_addr) + cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.start_clipper( query_frontend_image= From ebb5354ab8c0db0f58cf7041312a619313415aed Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 5 Jun 2018 14:22:36 -0700 Subject: [PATCH 28/63] Remove temp_dir usage in deployer serialize func --- clipper_admin/clipper_admin/deployers/deployer_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clipper_admin/clipper_admin/deployers/deployer_utils.py b/clipper_admin/clipper_admin/deployers/deployer_utils.py index 17b13778f..79bfb969e 100644 --- a/clipper_admin/clipper_admin/deployers/deployer_utils.py +++ b/clipper_admin/clipper_admin/deployers/deployer_utils.py @@ -2,7 +2,6 @@ import logging from cloudpickle import CloudPickler -from ..clipper_admin import CLIPPER_TEMP_DIR import os import tempfile import sys @@ -38,9 +37,7 @@ def save_python_function(name, func): serialized_prediction_function = s.getvalue() # Set up serialization directory - if not os.path.exists(CLIPPER_TEMP_DIR): - os.makedirs(CLIPPER_TEMP_DIR) - serialization_dir = tempfile.mkdtemp(dir=CLIPPER_TEMP_DIR) + serialization_dir = os.path.abspath(tempfile.mkdtemp(suffix='clipper')) logger.info("Saving function to {}".format(serialization_dir)) # Write out function serialization From 396ed5f1c07af4192ff215b6402421b6f71156c6 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 5 Jun 2018 15:34:32 -0700 Subject: [PATCH 29/63] Address comment --- bin/run_ci.sh | 6 ++ bin/run_unittests.sh | 4 +- .../clipper_admin/container_manager.py | 48 +------------- .../docker/docker_container_manager.py | 59 +++++++++++++++-- .../kubernetes_container_manager.py | 19 ++++-- integration-tests/kubernetes_namespace.py | 66 +++++++++++++++++++ ..._tenency_test.py => multi_tenancy_test.py} | 0 integration-tests/test_utils.py | 5 +- 8 files changed, 143 insertions(+), 64 deletions(-) create mode 100644 integration-tests/kubernetes_namespace.py rename integration-tests/{multi_tenency_test.py => multi_tenancy_test.py} (100%) diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 6af56f434..4c0206845 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -7,6 +7,12 @@ set -o pipefail # Printout for timeout debug date +echo "DON'T PANIC--------------------------------" +echo "Jenkins don't test this" +exit 1 + + + unset CDPATH # one-liner from http://stackoverflow.com/a/246128 # Determines absolute path of the directory containing diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index 001b9b0fb..f5742e108 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -148,8 +148,8 @@ function run_integration_tests { ../integration-tests/r_integration_test/rclipper_test.sh & python ../integration-tests/clipper_metric_docker.py & python ../integration-tests/clipper_metric_kube.py & - python ../integration-tests/multi_tenency_test.py & - python ../integration-tests/multi_tenency_test.py --kubernetes & + python ../integration-tests/multi_tenancy_test.py & + python ../integration-tests/multi_tenancy_test.py --kubernetes & wait echo "GREPTHIS Docker State After" diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index f92f02f26..6ef5d27a6 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -1,7 +1,5 @@ import abc from .exceptions import ClipperException -import random -import socket import logging # Constants @@ -21,7 +19,7 @@ CLIPPER_DOCKER_PORT_LABELS = { 'redis': 'ai.clipper.redis.port', - 'query_query': 'ai.clipper.query_frontend.query.port', + 'query_rest': 'ai.clipper.query_frontend.query.port', 'query_rpc': 'ai.clipper.query_frontend.rpc.port', 'management': 'ai.clipper.management.port', 'metric': 'ai.clipper.metric.port' @@ -62,50 +60,6 @@ def parse_model_container_label(label): return splits -def find_unbound_port(start=None, - increment=True, - port_range=(34256, 50000), - verbose=False, - logger=None): - """ - Fina a unbound port. - - Parameters - ---------- - start : int - The port number to start with. If this port is unbounded, return this port. - If None, start will be a random port. - increment : bool - If True, find port by incrementing start port; else, random search. - port_range : tuple - The range of port for random number generation - verbose : bool - Verbose flag for logging - logger: logging.Logger - """ - while True: - if not start: - start = random.randint(*port_range) - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - sock.bind(("127.0.0.1", start)) - # Make sure we clean up after binding - del sock - return start - except socket.error as e: - if verbose and logger: - logger.info("Socket error: {}".format(e)) - logger.info( - "randomly generated port %d is bound. Trying again." % - start) - - if increment: - start += 1 - else: - start = random.randint(*port_range) - - class ContainerManager(object): __metaclass__ = abc.ABCMeta diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 369749db0..08b0548b5 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -1,4 +1,7 @@ from __future__ import absolute_import, division, print_function + +import socket + import docker import logging import os @@ -9,7 +12,7 @@ import tempfile from ..container_manager import ( create_model_container_label, parse_model_container_label, - find_unbound_port, ContainerManager, CLIPPER_DOCKER_LABEL, + ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL, CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL, CLIPPER_INTERNAL_RPC_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_MANAGEMENT_PORT, @@ -132,12 +135,12 @@ def start_clipper(self, containers_in_cluster = self.docker_client.containers.list( filters={ 'label': - ['ai.clipper.container.label={}'.format(self.cluster_name)] + ['{kye}={val}'.format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name)] }) if len(containers_in_cluster) > 0: raise ClipperException( "Cluster {} cannot be started because it already exists. " - "Please use clipper_conn.connect() to connect to it.".format( + "Please use ClipperConnection.connect() to connect to it.".format( self.cluster_name)) if not self.external_redis: @@ -190,7 +193,7 @@ def start_clipper(self, self.clipper_rpc_port = find_unbound_port(self.clipper_rpc_port) query_labels = self.common_labels.copy() query_labels[CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL] = "" - query_labels[CLIPPER_DOCKER_PORT_LABELS['query_query']] = str( + query_labels[CLIPPER_DOCKER_PORT_LABELS['query_rest']] = str( self.clipper_query_port) query_labels[CLIPPER_DOCKER_PORT_LABELS['query_rpc']] = str( self.clipper_rpc_port) @@ -240,7 +243,7 @@ def connect(self): containers = self.docker_client.containers.list( filters={ 'label': - ['ai.clipper.container.label={}'.format(self.cluster_name)] + ['{key}={val}'.format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name)] }) all_labels = {} for container in containers: @@ -250,7 +253,7 @@ def connect(self): self.clipper_management_port = all_labels[CLIPPER_DOCKER_PORT_LABELS[ 'management']] self.clipper_query_port = all_labels[CLIPPER_DOCKER_PORT_LABELS[ - 'query_query']] + 'query_rest']] self.clipper_rpc_port = all_labels[CLIPPER_DOCKER_PORT_LABELS[ 'query_rpc']] self.prometheus_port = all_labels[CLIPPER_DOCKER_PORT_LABELS['metric']] @@ -443,3 +446,47 @@ def get_query_addr(self): def get_metric_addr(self): return "{host}:{port}".format( host=self.public_hostname, port=self.prometheus_port) + + +def find_unbound_port(start=None, + increment=True, + port_range=(10000, 50000), + verbose=False, + logger=None): + """ + Find a unbound port. + + Parameters + ---------- + start : int + The port number to start with. If this port is unbounded, return this port. + If None, start will be a random port. + increment : bool + If True, find port by incrementing start port; else, random search. + port_range : tuple + The range of port for random number generation + verbose : bool + Verbose flag for logging + logger: logging.Logger + """ + while True: + if not start: + start = random.randint(*port_range) + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.bind(("127.0.0.1", start)) + # Make sure we clean up after binding + del sock + return start + except socket.error as e: + if verbose and logger: + logger.info("Socket error: {}".format(e)) + logger.info( + "randomly generated port %d is bound. Trying again." % + start) + + if increment: + start += 1 + else: + start = random.randint(*port_range) \ No newline at end of file diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 724a8b397..1d4b67157 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -131,9 +131,6 @@ def __init__(self, loader=jinja2.FileSystemLoader(cur_dir, followlinks=True), undefined=jinja2.StrictUndefined) - self.logger = ClusterAdapter(logger, { - 'cluster_name': self.cluster_name - }) # Check if namespace exists and if create flag set ...create the namespace or throw error namespaces = [] @@ -159,6 +156,14 @@ def __init__(self, logger.error(msg) raise ClipperException(msg) + cluster_identifier = "{ns}{cluster}".format( + ns=self.k8s_namespace+'/' if self.k8s_namespace != "default" else "", + cluster=self.cluster_name + ) + self.logger = ClusterAdapter(logger, { + 'cluster_name': cluster_identifier + }) + def start_clipper(self, query_frontend_image, mgmt_frontend_image, @@ -196,21 +201,21 @@ def _start_redis(self, sleep_time=5): def _start_mgmt(self, mgmt_image): with _pass_conflicts(): - mgmt_depolyment_data = self._generate_config( + mgmt_deployment_data = self._generate_config( CONFIG_FILES['management']['deployment'], image=mgmt_image, redis_service_host=self.redis_ip, redis_service_port=self.redis_port, cluster_name=self.cluster_name) self._k8s_beta.create_namespaced_deployment( - body=mgmt_depolyment_data, namespace='default') + body=mgmt_deployment_data, namespace=self.k8s_namespace) with _pass_conflicts(): mgmt_service_data = self._generate_config( CONFIG_FILES['management']['service'], cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( - body=mgmt_service_data, namespace=self.namespace) + body=mgmt_service_data, namespace=self.k8s_namespace) def _start_query(self, query_image, cache_size, num_replicas): for query_frontend_id in range(num_replicas): @@ -242,7 +247,7 @@ def _start_query(self, query_image, cache_size, num_replicas): CONFIG_FILES['query']['service']['query'], cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( - body=query_frontend_service_data, namespace=self.namespace) + body=query_frontend_service_data, namespace=self.k8s_namespace) def _start_prometheus(self): with _pass_conflicts(): diff --git a/integration-tests/kubernetes_namespace.py b/integration-tests/kubernetes_namespace.py new file mode 100644 index 000000000..4a85ff9d6 --- /dev/null +++ b/integration-tests/kubernetes_namespace.py @@ -0,0 +1,66 @@ +""" +Sanity Check for Kubernetes Namespace(s) +""" + +from clipper_admin import ClipperConnection, DockerContainerManager, KubernetesContainerManager +from clipper_admin.deployers import python as python_deployer + +import signal +import sys +import json +import requests +from datetime import datetime +import os +import time +from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY + + +def test(): + conn_1 = create_kubernetes_connection( + cleanup=False, start_clipper=True, namespace='ns-1') + conn_2 = create_kubernetes_connection( + cleanup=False, start_clipper=True, namespace='ns-2') + + deploy_(conn_1) + deploy_(conn_2) + + res_1 = predict_(conn_1.get_query_addr(), [.1, .2, .3]) + res_2 = predict_(conn_2.get_query_addr(), [.1, .2, .3]) + assert not res_1['default'] + assert not res_2['default'] + + conn_1.stop_all() + conn_2.stop_all() + + +def feature_sum(xs): + return [str(sum(x)) for x in xs] + + +def deploy_(clipper_conn): + python_deployer.create_endpoint( + clipper_conn, + "simple-example", + "doubles", + feature_sum, + registry=CLIPPER_CONTAINER_REGISTRY) + +def predict_(addr, x, batch=False): + url = "http://%s/simple-example/predict" % addr + + if batch: + req_json = json.dumps({'input_batch': x}) + else: + req_json = json.dumps({'input': list(x)}) + + headers = {'Content-type': 'application/json'} + start = datetime.now() + r = requests.post(url, headers=headers, data=req_json) + end = datetime.now() + latency = (end - start).total_seconds() * 1000.0 + print("'%s', %f ms" % (r.text, latency)) + return r.json() + + +if __name__ == '__main__': + test() diff --git a/integration-tests/multi_tenency_test.py b/integration-tests/multi_tenancy_test.py similarity index 100% rename from integration-tests/multi_tenency_test.py rename to integration-tests/multi_tenancy_test.py diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 72054d954..f3c865c5b 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -123,7 +123,8 @@ def create_kubernetes_connection(cleanup=False, num_frontend_replicas=1, cleanup_name='default-cluster', new_name='default-cluster', - connect_name='default-cluster'): + connect_name='default-cluster', + namespace='default'): logger.info("Creating KubernetesContainerManager") cl = None assert cleanup or start_clipper or connect, "You must set at least one of {cleanup, start_clipper, connect} to be true." @@ -145,7 +146,7 @@ def create_kubernetes_connection(cleanup=False, if start_clipper: logger.info("Starting up Kubernetes Cluster {}".format(new_name)) cm = KubernetesContainerManager( - cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr) + cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr, namespace=namespace) cl = ClipperConnection(cm) cl.start_clipper( query_frontend_image= From 09bf075243f7c40f9ffbce370817b9a122a61425 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 5 Jun 2018 15:35:21 -0700 Subject: [PATCH 30/63] Format code --- .../docker/docker_container_manager.py | 22 +++++++++++-------- .../kubernetes_container_manager.py | 7 +++--- integration-tests/kubernetes_namespace.py | 5 +++-- integration-tests/test_utils.py | 4 +++- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 08b0548b5..5d9f12c7a 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -12,8 +12,8 @@ import tempfile from ..container_manager import ( create_model_container_label, parse_model_container_label, - ContainerManager, CLIPPER_DOCKER_LABEL, - CLIPPER_MODEL_CONTAINER_LABEL, CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL, + ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL, + CLIPPER_QUERY_FRONTEND_CONTAINER_LABEL, CLIPPER_MGMT_FRONTEND_CONTAINER_LABEL, CLIPPER_INTERNAL_RPC_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_INTERNAL_REDIS_PORT, @@ -134,14 +134,16 @@ def start_clipper(self, containers_in_cluster = self.docker_client.containers.list( filters={ - 'label': - ['{kye}={val}'.format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name)] + 'label': [ + '{kye}={val}'.format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) + ] }) if len(containers_in_cluster) > 0: raise ClipperException( "Cluster {} cannot be started because it already exists. " - "Please use ClipperConnection.connect() to connect to it.".format( - self.cluster_name)) + "Please use ClipperConnection.connect() to connect to it.". + format(self.cluster_name)) if not self.external_redis: self.logger.info("Starting managed Redis instance in Docker") @@ -242,8 +244,10 @@ def connect(self): """ containers = self.docker_client.containers.list( filters={ - 'label': - ['{key}={val}'.format(key=CLIPPER_DOCKER_LABEL, val=self.cluster_name)] + 'label': [ + '{key}={val}'.format( + key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) + ] }) all_labels = {} for container in containers: @@ -489,4 +493,4 @@ def find_unbound_port(start=None, if increment: start += 1 else: - start = random.randint(*port_range) \ No newline at end of file + start = random.randint(*port_range) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 1d4b67157..8b867d25e 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -131,7 +131,6 @@ def __init__(self, loader=jinja2.FileSystemLoader(cur_dir, followlinks=True), undefined=jinja2.StrictUndefined) - # Check if namespace exists and if create flag set ...create the namespace or throw error namespaces = [] for ns in self._k8s_v1.list_namespace().items: @@ -157,9 +156,9 @@ def __init__(self, raise ClipperException(msg) cluster_identifier = "{ns}{cluster}".format( - ns=self.k8s_namespace+'/' if self.k8s_namespace != "default" else "", - cluster=self.cluster_name - ) + ns=self.k8s_namespace + '/' + if self.k8s_namespace != "default" else "", + cluster=self.cluster_name) self.logger = ClusterAdapter(logger, { 'cluster_name': cluster_identifier }) diff --git a/integration-tests/kubernetes_namespace.py b/integration-tests/kubernetes_namespace.py index 4a85ff9d6..c648dac62 100644 --- a/integration-tests/kubernetes_namespace.py +++ b/integration-tests/kubernetes_namespace.py @@ -17,9 +17,9 @@ def test(): conn_1 = create_kubernetes_connection( - cleanup=False, start_clipper=True, namespace='ns-1') + cleanup=False, start_clipper=True, namespace='ns-1') conn_2 = create_kubernetes_connection( - cleanup=False, start_clipper=True, namespace='ns-2') + cleanup=False, start_clipper=True, namespace='ns-2') deploy_(conn_1) deploy_(conn_2) @@ -45,6 +45,7 @@ def deploy_(clipper_conn): feature_sum, registry=CLIPPER_CONTAINER_REGISTRY) + def predict_(addr, x, batch=False): url = "http://%s/simple-example/predict" % addr diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index f3c865c5b..a2a23dbcb 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -146,7 +146,9 @@ def create_kubernetes_connection(cleanup=False, if start_clipper: logger.info("Starting up Kubernetes Cluster {}".format(new_name)) cm = KubernetesContainerManager( - cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr, namespace=namespace) + cluster_name=new_name, + kubernetes_proxy_addr=kubernetes_proxy_addr, + namespace=namespace) cl = ClipperConnection(cm) cl.start_clipper( query_frontend_image= From 878a00f9693b3db651f5ef232b38c4ce5a9ef00a Mon Sep 17 00:00:00 2001 From: simon-mo Date: Wed, 6 Jun 2018 00:44:37 +0000 Subject: [PATCH 31/63] Add more comments; shorten naming for cluster --- bin/run_ci.sh | 6 ---- bin/run_unittests.sh | 32 +++++++++---------- clipper_admin/clipper_admin/clipper_admin.py | 2 +- .../docker/docker_container_manager.py | 9 ++++-- .../kubernetes_container_manager.py | 15 ++++++--- integration-tests/clipper_admin_tests.py | 5 ++- integration-tests/clipper_metric_docker.py | 5 +-- integration-tests/clipper_metric_kube.py | 2 +- integration-tests/deploy_mxnet_models.py | 2 +- integration-tests/deploy_pyspark_models.py | 2 +- .../deploy_pyspark_pipeline_models.py | 3 +- .../deploy_pyspark_sparkml_models.py | 2 +- integration-tests/deploy_pytorch_models.py | 2 +- .../deploy_pytorch_to_caffe2_with_onnx.py | 2 +- integration-tests/deploy_tensorflow_models.py | 2 +- integration-tests/deploy_xgboost_models.py | 2 +- .../kubernetes_integration_test.py | 2 +- .../kubernetes_multi_frontend.py | 3 +- integration-tests/many_apps_many_models.py | 7 ++-- integration-tests/multi_tenancy_test.py | 4 +-- integration-tests/test_utils.py | 7 ++-- 21 files changed, 58 insertions(+), 58 deletions(-) diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 4c0206845..6af56f434 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -7,12 +7,6 @@ set -o pipefail # Printout for timeout debug date -echo "DON'T PANIC--------------------------------" -echo "Jenkins don't test this" -exit 1 - - - unset CDPATH # one-liner from http://stackoverflow.com/a/246128 # Determines absolute path of the directory containing diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh index f5742e108..53365e569 100755 --- a/bin/run_unittests.sh +++ b/bin/run_unittests.sh @@ -133,24 +133,24 @@ function run_integration_tests { echo "GREPTHIS Docker State before:" docker ps - python ../integration-tests/clipper_admin_tests.py & - python ../integration-tests/many_apps_many_models.py 2 3 & - python ../integration-tests/deploy_pyspark_models.py & - python ../integration-tests/deploy_pyspark_pipeline_models.py & - python ../integration-tests/deploy_pyspark_sparkml_models.py & - python ../integration-tests/kubernetes_integration_test.py & - python ../integration-tests/kubernetes_multi_frontend.py & - python ../integration-tests/deploy_tensorflow_models.py & - python ../integration-tests/deploy_mxnet_models.py & - python ../integration-tests/deploy_pytorch_models.py & + python ../integration-tests/clipper_admin_tests.py + python ../integration-tests/many_apps_many_models.py 2 3 + python ../integration-tests/deploy_pyspark_models.py + python ../integration-tests/deploy_pyspark_pipeline_models.py + python ../integration-tests/deploy_pyspark_sparkml_models.py + python ../integration-tests/kubernetes_integration_test.py + python ../integration-tests/kubernetes_multi_frontend.py + python ../integration-tests/deploy_tensorflow_models.py + python ../integration-tests/deploy_mxnet_models.py + python ../integration-tests/deploy_pytorch_models.py # See issue #475 # python ../integration-tests/deploy_pytorch_to_caffe2_with_onnx.py - ../integration-tests/r_integration_test/rclipper_test.sh & - python ../integration-tests/clipper_metric_docker.py & - python ../integration-tests/clipper_metric_kube.py & - python ../integration-tests/multi_tenancy_test.py & - python ../integration-tests/multi_tenancy_test.py --kubernetes & - wait + ../integration-tests/r_integration_test/rclipper_test.sh + python ../integration-tests/clipper_metric_docker.py + python ../integration-tests/clipper_metric_kube.py + python ../integration-tests/multi_tenancy_test.py + python ../integration-tests/multi_tenancy_test.py --kubernetes + python ../integration-tests/kubernetes_namespace.py echo "GREPTHIS Docker State After" docker ps diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 49d353f84..307fdfe48 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -80,7 +80,7 @@ def __init__(self, container_manager): self.cm = container_manager self.logger = ClusterAdapter(logger, { - 'cluster_name': self.cm.cluster_name + 'cluster_name': self.cm.cluster_identifier }) def start_clipper( diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index 5d9f12c7a..4b7e93d39 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -68,6 +68,7 @@ def __init__(self, :py:meth:`docker.client.containers.run`. """ self.cluster_name = cluster_name + self.cluster_identifier = cluster_name # For logging purpose self.public_hostname = docker_ip_address self.clipper_query_port = clipper_query_port self.clipper_management_port = clipper_management_port @@ -106,7 +107,7 @@ def __init__(self, self.extra_container_kwargs.update(container_args) self.logger = ClusterAdapter(logger, { - 'cluster_name': self.cluster_name + 'cluster_name': self.cluster_identifier }) def start_clipper(self, @@ -135,7 +136,7 @@ def start_clipper(self, containers_in_cluster = self.docker_client.containers.list( filters={ 'label': [ - '{kye}={val}'.format( + '{key}={val}'.format( key=CLIPPER_DOCKER_LABEL, val=self.cluster_name) ] }) @@ -222,6 +223,8 @@ def start_clipper(self, 'w', suffix='.yml', delete=False).name self.prom_config_path = os.path.realpath( self.prom_config_path) # resolve symlink + self.logger.info("Metric Configuration Saved at {path}".format( + path=self.prom_config_path)) setup_metric_config(query_frontend_metric_name, self.prom_config_path, CLIPPER_INTERNAL_METRIC_PORT) @@ -453,7 +456,7 @@ def get_metric_addr(self): def find_unbound_port(start=None, - increment=True, + increment=False, port_range=(10000, 50000), verbose=False, logger=None): diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 8b867d25e..6a125dc55 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -76,6 +76,11 @@ def __init__(self, cluster_name : str A unique name for this Clipper cluster. This can be used to run multiple Clipper clusters on the same Kubernetes cluster without interfering with each other. + Kubernetes cluster name must follow Kubernetes label value naming rule, namely: + Valid label values must be 63 characters or less and must be empty or begin and end with + an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_), dots (.), + and alphanumerics between. See more at: + https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set kubernetes_proxy_addr : str, optional The proxy address if you are proxying connections locally using ``kubectl proxy``. If this argument is provided, Clipper will construct the appropriate proxy @@ -151,16 +156,16 @@ def __init__(self, "Reason: {}".format(e.reason)) self.k8s_namespace = namespace else: - msg = "Error connecting to Kubernetes cluster. Namespace does not exist" + msg = "Error connecting to Kubernetes cluster. Namespace does not exist. You can pass in KubernetesContainerManager(create_namespace_if_not_exists=True) to crate this namespcae" logger.error(msg) raise ClipperException(msg) - cluster_identifier = "{ns}{cluster}".format( + self.cluster_identifier = "{ns}{cluster}".format( ns=self.k8s_namespace + '/' if self.k8s_namespace != "default" else "", cluster=self.cluster_name) self.logger = ClusterAdapter(logger, { - 'cluster_name': cluster_identifier + 'cluster_name': self.cluster_identifier }) def start_clipper(self, @@ -296,7 +301,7 @@ def connect(self): external_node_hosts.append(addr.address) if len(external_node_hosts) == 0: - msg = "Error connecting to Kubernetes cluster. No external node addresses found" + msg = "Error connecting to Kubernetes cluster. No external node addresses found. If you are running Kubernetes locally, you can pass in KubernetesContainerManager(useInternalIP=True) to connect to local Kubernetes cluster" self.logger.error(msg) raise ClipperException(msg) @@ -309,7 +314,7 @@ def connect(self): mgmt_frontend_ports = self._k8s_v1.read_namespaced_service( name="mgmt-frontend-at-{cluster_name}".format( cluster_name=self.cluster_name), - namespace=sekf.k8s_namespace).spec.ports + namespace=self.k8s_namespace).spec.ports for p in mgmt_frontend_ports: if p.name == "1338": self.clipper_management_port = p.node_port diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index ef3d1c1c5..dc4be7646 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -581,8 +581,7 @@ def test_build_model_with_custom_packages(self): class ClipperManagerTestCaseLong(unittest.TestCase): - cluster_name = "admin-test-long-cluster-{}".format( - random.randint(0, 50000)) + cluster_name = "admin-l-{}".format(random.randint(0, 50000)) @classmethod def setUpClass(self): @@ -915,5 +914,5 @@ def test_remove_inactive_container(self): for test in LONG_TEST_ORDERING: suite.addTest(ClipperManagerTestCaseLong(test)) - result = unittest.TextTestRunner(verbosity=2).run(suite) + result = unittest.TextTestRunner(verbosity=2, failfast=True).run(suite) sys.exit(not result.wasSuccessful()) diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index 2a27b4353..d72e9e8ef 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -87,7 +87,7 @@ def log_docker_ps(clipper_conn): logger.info("Start Metric Test (0/1): Running 2 Replicas") - cluster_name = "metric-docker-cluster-{}".format(random.randint(0, 50000)) + cluster_name = "metric-d-{}".format(random.randint(0, 50000)) clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) python_deployer.create_endpoint( @@ -119,7 +119,8 @@ def log_docker_ps(clipper_conn): logger.info("Test 2 Passed") logger.info("Metric Test Done, Cleaning up...") - create_docker_connection(cleanup=True, cleanup_name=cluster_name) + create_docker_connection( + cleanup=True, start_clipper=False, cleanup_name=cluster_name) except Exception as e: log_docker_ps(clipper_conn) logger.error(e) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index 3d956ee9d..8c0403cfd 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -138,7 +138,7 @@ def check_target_health(metric_addr): if __name__ == "__main__": import random - cluster_name = 'metric-k8s-cluster-{}'.format(random.randint(0, 5000)) + cluster_name = 'metric-k8s-{}'.format(random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( new_name=cluster_name, cleanup=False, start_clipper=True) diff --git a/integration-tests/deploy_mxnet_models.py b/integration-tests/deploy_mxnet_models.py index 097eb3f89..872e9915f 100644 --- a/integration-tests/deploy_mxnet_models.py +++ b/integration-tests/deploy_mxnet_models.py @@ -97,7 +97,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "mxnet-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "mxnet-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( new_name=cluster_name, cleanup=False, start_clipper=True) diff --git a/integration-tests/deploy_pyspark_models.py b/integration-tests/deploy_pyspark_models.py index 1368af881..0518634b1 100644 --- a/integration-tests/deploy_pyspark_models.py +++ b/integration-tests/deploy_pyspark_models.py @@ -120,7 +120,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "pyspark-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "spark-{}".format(random.randint(0, 5000)) try: spark = SparkSession\ diff --git a/integration-tests/deploy_pyspark_pipeline_models.py b/integration-tests/deploy_pyspark_pipeline_models.py index 08ece0e23..f84dd5c5b 100644 --- a/integration-tests/deploy_pyspark_pipeline_models.py +++ b/integration-tests/deploy_pyspark_pipeline_models.py @@ -57,8 +57,7 @@ def predict(spark, pipeline, xs): def run_test(): import random - cluster_name = "pyspark-pipeline-cluster-{}".format( - random.randint(0, 5000)) + cluster_name = "spark-pipe-{}".format(random.randint(0, 5000)) spark = SparkSession\ .builder\ diff --git a/integration-tests/deploy_pyspark_sparkml_models.py b/integration-tests/deploy_pyspark_sparkml_models.py index 05266ee49..661c0c658 100644 --- a/integration-tests/deploy_pyspark_sparkml_models.py +++ b/integration-tests/deploy_pyspark_sparkml_models.py @@ -115,7 +115,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "pyspark-ml-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "sparkml-{}".format(random.randint(0, 5000)) try: spark = SparkSession\ .builder\ diff --git a/integration-tests/deploy_pytorch_models.py b/integration-tests/deploy_pytorch_models.py index 4ac2dcc8a..e49357286 100644 --- a/integration-tests/deploy_pytorch_models.py +++ b/integration-tests/deploy_pytorch_models.py @@ -161,7 +161,7 @@ def __getitem__(self, index): pos_label = 3 import random - cluster_name = "pytorch-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "torch-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) diff --git a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py index 4e978d376..6e81593ee 100644 --- a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py +++ b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py @@ -167,7 +167,7 @@ def __getitem__(self, index): pos_label = 3 import random - cluster_name = "onnx-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "onnx-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) diff --git a/integration-tests/deploy_tensorflow_models.py b/integration-tests/deploy_tensorflow_models.py index 16249dacb..b4f8f584e 100644 --- a/integration-tests/deploy_tensorflow_models.py +++ b/integration-tests/deploy_tensorflow_models.py @@ -150,7 +150,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "tf-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "tf-{}".format(random.randint(0, 5000)) try: sess = None clipper_conn = create_docker_connection( diff --git a/integration-tests/deploy_xgboost_models.py b/integration-tests/deploy_xgboost_models.py index 40e0ce620..2bb6fad32 100644 --- a/integration-tests/deploy_xgboost_models.py +++ b/integration-tests/deploy_xgboost_models.py @@ -83,7 +83,7 @@ def get_test_point(): pos_label = 3 import random - cluster_name = "xgboost-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "xg-{}".format(random.randint(0, 5000)) try: clipper_conn = create_docker_connection( cleanup=False, start_clipper=True, new_name=cluster_name) diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index dfa906783..76f180d20 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -149,7 +149,7 @@ def test_kubernetes(clipper_conn, num_apps, num_models): # Test without proxy first import random - cluster_name = "k8s-test-cluster-{}".format(random.randint(0, 5000)) + cluster_name = "k8-{}".format(random.randint(0, 5000)) clipper_conn = create_kubernetes_connection( cleanup=False, diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index 9acc2134e..f3b3100f4 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -102,8 +102,7 @@ def create_and_test_app(clipper_conn, name): if __name__ == "__main__": import random - cluster_name = "kube-multi-frontend-cluster-{}".format( - random.randint(0, 5000)) + cluster_name = "k8-frontx-{}".format(random.randint(0, 5000)) try: clipper_conn = create_kubernetes_connection( cleanup=False, diff --git a/integration-tests/many_apps_many_models.py b/integration-tests/many_apps_many_models.py index d5a043c69..3c40672ae 100644 --- a/integration-tests/many_apps_many_models.py +++ b/integration-tests/many_apps_many_models.py @@ -94,12 +94,11 @@ def create_and_test_app(clipper_conn, name, num_models): if __name__ == "__main__": - num_apps = 6 - num_models = 8 + num_apps = 2 + num_models = 3 import random - cluster_name = "many-app-many-models-cluster-{}".format( - random.randint(0, 5000)) + cluster_name = "many-app-{}".format(random.randint(0, 5000)) try: if len(sys.argv) > 1: diff --git a/integration-tests/multi_tenancy_test.py b/integration-tests/multi_tenancy_test.py index d523484d0..bab9e25fd 100644 --- a/integration-tests/multi_tenancy_test.py +++ b/integration-tests/multi_tenancy_test.py @@ -12,8 +12,8 @@ def test(kubernetes): - conn_1 = create('multi-tenancy-test-cluster-1', use_kubernetes=kubernetes) - conn_2 = create('multi-tenancy-test-cluster-2', use_kubernetes=kubernetes) + conn_1 = create('multi-tenancy-1', use_kubernetes=kubernetes) + conn_2 = create('multi-tenancy-2', use_kubernetes=kubernetes) deploy_(conn_1, use_kubernetes=kubernetes) deploy_(conn_2, use_kubernetes=kubernetes) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index a2a23dbcb..7527431e1 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -64,8 +64,8 @@ def find_unbound_port(): "randomly generated port %d is bound. Trying again." % port) -def create_docker_connection(cleanup=True, - start_clipper=True, +def create_docker_connection(cleanup=False, + start_clipper=False, cleanup_name='default-cluster', new_name='default-cluster'): logger.info("Creating DockerContainerManager") @@ -148,7 +148,8 @@ def create_kubernetes_connection(cleanup=False, cm = KubernetesContainerManager( cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr, - namespace=namespace) + namespace=namespace, + create_namespace_if_not_exists=True) cl = ClipperConnection(cm) cl.start_clipper( query_frontend_image= From c7a5b3f624c9a96909739903f425e8ce5f85624e Mon Sep 17 00:00:00 2001 From: simon-mo Date: Wed, 6 Jun 2018 06:32:18 +0000 Subject: [PATCH 32/63] Fix proxy addr --- .../kubernetes/kubernetes_container_manager.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 6a125dc55..ee71e803e 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -518,9 +518,10 @@ def get_registry(self): def get_admin_addr(self): if self.use_k8s_proxy: return ("{proxy_addr}/api/v1/namespaces/{ns}/" - "services/mgmt-frontend:{port}/proxy").format( + "services/mgmt-frontend-at-{cluster}:{port}/proxy").format( proxy_addr=self.kubernetes_proxy_addr, ns=self.k8s_namespace, + cluster=self.cluster_name, port=CLIPPER_INTERNAL_MANAGEMENT_PORT) else: @@ -531,9 +532,10 @@ def get_admin_addr(self): def get_query_addr(self): if self.use_k8s_proxy: return ("{proxy_addr}/api/v1/namespaces/{ns}/" - "services/query-frontend:{port}/proxy").format( + "services/query-frontend-at-{cluster}:{port}/proxy").format( proxy_addr=self.kubernetes_proxy_addr, ns=self.k8s_namespace, + cluster=self.cluster_name, port=CLIPPER_INTERNAL_QUERY_PORT) else: return "{host}:{port}".format( @@ -542,9 +544,10 @@ def get_query_addr(self): def get_metric_addr(self): if self.use_k8s_proxy: return ("{proxy_addr}/api/v1/namespaces/{ns}/" - "services/metrics:{port}/proxy").format( + "services/metrics-at-{cluster}:{port}/proxy").format( proxy_addr=self.kubernetes_proxy_addr, ns=self.k8s_namespace, + cluster=self.cluster_name, port=CLIPPER_INTERNAL_METRIC_PORT) else: return "{host}:{port}".format( From ebe5208d362473f0423a94230a5e6f2b538b87f2 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 5 Jun 2018 23:35:49 -0700 Subject: [PATCH 33/63] Format code --- .../kubernetes/kubernetes_container_manager.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index ee71e803e..0d486e7ee 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -531,12 +531,13 @@ def get_admin_addr(self): def get_query_addr(self): if self.use_k8s_proxy: - return ("{proxy_addr}/api/v1/namespaces/{ns}/" - "services/query-frontend-at-{cluster}:{port}/proxy").format( - proxy_addr=self.kubernetes_proxy_addr, - ns=self.k8s_namespace, - cluster=self.cluster_name, - port=CLIPPER_INTERNAL_QUERY_PORT) + return ( + "{proxy_addr}/api/v1/namespaces/{ns}/" + "services/query-frontend-at-{cluster}:{port}/proxy").format( + proxy_addr=self.kubernetes_proxy_addr, + ns=self.k8s_namespace, + cluster=self.cluster_name, + port=CLIPPER_INTERNAL_QUERY_PORT) else: return "{host}:{port}".format( host=self.external_node_hosts[0], port=self.clipper_query_port) From 2270c526491cb22edf55d3417a27ec44d82ffb5b Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 6 Jun 2018 01:08:09 -0700 Subject: [PATCH 34/63] Wrap k8s metric test 2 in retry loop This is the only way to prevent some jenkins & mismatch --- integration-tests/clipper_metric_kube.py | 26 +++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index 8c0403cfd..5a07c159e 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -182,11 +182,27 @@ def check_target_health(metric_addr): conf = conf['Model Container'] prefix = 'clipper_{}_'.format(conf.pop('prefix')) for name, spec in conf.items(): - name = prefix + name - if spec['type'] == 'Histogram' or spec['type'] == 'Summary': - name += '_sum' - res = get_matched_query(metric_api_addr, name) - parse_res_and_assert_node(res, node_num=2) + + retry_count = MAX_RETRY + while retry_count: + try: + name = prefix + name + if spec['type'] == 'Histogram' or spec['type'] == 'Summary': + name += '_sum' + res = get_matched_query(metric_api_addr, name) + parse_res_and_assert_node(res, node_num=2) + retry_count = 0 + except AssertionError as e: + logger.info( + "Exception noted. Will retry again in 10 seconds.") + logger.info(e) + retry_count -= 1 + if retry_count == 0: # a.k.a. the last retry + raise e + else: + time.sleep(10) + pass # try again. + logger.info("Test 2 Passed") # End Metric Check if not os.path.exists(CLIPPER_TEMP_DIR): From a7ccda2d1f8b4f26cbf0ff16906f22eef2f10683 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 6 Jun 2018 01:12:39 -0700 Subject: [PATCH 35/63] Clean up test --- integration-tests/clipper_metric_kube.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index 5a07c159e..c8b082294 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -240,4 +240,9 @@ def check_target_health(metric_addr): sys.exit(1) except Exception as e: logger.exception("Exception: {}".format(e)) + create_kubernetes_connection( + cleanup=True, + start_clipper=False, + connect=False, + cleanup_name=cluster_name) sys.exit(1) From 3d37b5a86e70f94d37837fb9890d26cabba13ba9 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 6 Jun 2018 02:11:45 -0700 Subject: [PATCH 36/63] Format code --- clipper_admin/docs/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clipper_admin/docs/conf.py b/clipper_admin/docs/conf.py index 2fe890638..431463de9 100644 --- a/clipper_admin/docs/conf.py +++ b/clipper_admin/docs/conf.py @@ -201,4 +201,5 @@ def setup(app): # }, True) # app.add_transform(AutoStructify) + autodoc_mock_imports = ['tensorflow', 'torch'] From caee0ba5c1032375aa91595f718007523b9e0a90 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 8 Jun 2018 15:27:58 -0700 Subject: [PATCH 37/63] Fix frontend exporter naming --- .../clipper_admin/kubernetes/query-frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml b/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml index 13021e863..02487357a 100644 --- a/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml +++ b/clipper_admin/clipper_admin/kubernetes/query-frontend-deployment.yaml @@ -32,7 +32,7 @@ spec: - containerPort: 1337 - args: - "--query_frontend_name" - - "query-frontend-at-{{ cluster_name }}:1337" + - "query-frontend:1337" image: {{ exporter_image }} name: frontend-exporter ports: From b447eb05d28c1ab2f23b03418af12e9273598a95 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 11 Jun 2018 14:36:11 -0700 Subject: [PATCH 38/63] Add readme to CI process --- bin/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 bin/README.md diff --git a/bin/README.md b/bin/README.md new file mode 100644 index 000000000..7b48ba8b1 --- /dev/null +++ b/bin/README.md @@ -0,0 +1,21 @@ +# Description of CI process + +The wonderful AMPlab Jenkins is responsible for running our integration test. + +## How does the CI process work + + +0. Jenkins pull the PR and sandbox it. +1. Jenkins inject environment variables configured in admin page +2. Jenkins will call `run_ci.sh`. It does three things: + - It calls `build_docker_images.sh` to build all the docker images + - Then it runs unittests docker container for python2 and python3 + - Each unittest cotnainer will run `ci_checks.sh` +3. `ci_checks.sh` will run two things: + - (Only in Python2) It runs `check_foramt.sh` to run the linter for C++ and Python + - It runs `run_unnitests.sh` to run all tests. + - (Only in Python3) It will only run the integration test part, which contains all the python tests and R tests + +## Note on Minikube (WIP) +- We are under the process of moving away from AWS EKS to Minikube in our CI process. Once the PR is in, there will be +more detail here. \ No newline at end of file From 742d3daa468c6df78f1f6e031c8010267fac981c Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 11 Jun 2018 14:37:27 -0700 Subject: [PATCH 39/63] Change testapp name for ekr --- integration-tests/kubernetes_namespace.py | 4 ++-- integration-tests/multi_tenancy_test.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/integration-tests/kubernetes_namespace.py b/integration-tests/kubernetes_namespace.py index c648dac62..0a34d9668 100644 --- a/integration-tests/kubernetes_namespace.py +++ b/integration-tests/kubernetes_namespace.py @@ -40,14 +40,14 @@ def feature_sum(xs): def deploy_(clipper_conn): python_deployer.create_endpoint( clipper_conn, - "simple-example", + "testapp0-model", "doubles", feature_sum, registry=CLIPPER_CONTAINER_REGISTRY) def predict_(addr, x, batch=False): - url = "http://%s/simple-example/predict" % addr + url = "http://%s/testapp0-model/predict" % addr if batch: req_json = json.dumps({'input_batch': x}) diff --git a/integration-tests/multi_tenancy_test.py b/integration-tests/multi_tenancy_test.py index bab9e25fd..763ed1527 100644 --- a/integration-tests/multi_tenancy_test.py +++ b/integration-tests/multi_tenancy_test.py @@ -45,17 +45,17 @@ def deploy_(clipper_conn, use_kubernetes=False): if use_kubernetes: python_deployer.create_endpoint( clipper_conn, - "simple-example", + "testapp0-model", "doubles", feature_sum, registry=CLIPPER_CONTAINER_REGISTRY) else: - python_deployer.create_endpoint(clipper_conn, "simple-example", + python_deployer.create_endpoint(clipper_conn, "testapp0-model", "doubles", feature_sum) def predict_(addr, x, batch=False): - url = "http://%s/simple-example/predict" % addr + url = "http://%s/testapp0-model/predict" % addr if batch: req_json = json.dumps({'input_batch': x}) From 83c7c6713daf22434ce78ac49cb2e8857ebc9d15 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 11 Jun 2018 15:59:33 -0700 Subject: [PATCH 40/63] Swap the python closure build step --- clipper_admin/clipper_admin/clipper_admin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 307fdfe48..836a700ed 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -423,7 +423,7 @@ def build_model(self, try: df_contents = StringIO( str.encode( - "FROM {container_name}\nCOPY {data_path} /model/\n{run_command}\n". + "FROM {container_name}\n{run_command}\nCOPY {data_path} /model/\n". format( container_name=base_image, data_path=model_data_path, From ffc321eb051d049e45b6ff8795131cf7dec7a1da Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 13 Jun 2018 15:58:45 -0700 Subject: [PATCH 41/63] Add __registry__ argument --- bin/build_docker_images.sh | 7 ++++++- bin/run_ci.sh | 9 +++------ clipper_admin/clipper_admin/__init__.py | 2 +- clipper_admin/clipper_admin/clipper_admin.py | 20 +++++++++++++------ .../clipper_admin/container_manager.py | 2 +- .../clipper_admin/deployers/mxnet.py | 8 ++++---- clipper_admin/clipper_admin/deployers/onnx.py | 4 ++-- .../clipper_admin/deployers/pyspark.py | 8 ++++---- .../clipper_admin/deployers/python.py | 14 ++++++------- .../clipper_admin/deployers/pytorch.py | 13 ++++++------ .../clipper_admin/deployers/tensorflow.py | 8 ++++---- .../docker/docker_container_manager.py | 3 ++- .../docker/docker_metric_utils.py | 5 ++--- .../kubernetes_container_manager.py | 7 ++++--- .../kubernetes/kubernetes_metric_utils.py | 4 ---- .../clipper_admin/metrics/__init__.py | 1 - clipper_admin/clipper_admin/version.py | 2 ++ dockerfiles/ClipperPy35TestsDockerfile | 4 ++++ dockerfiles/ClipperTestsDockerfile | 4 ++++ integration-tests/test_utils.py | 12 +---------- 20 files changed, 72 insertions(+), 65 deletions(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index 50b3f4c60..7740ad18d 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -213,7 +213,12 @@ create_image () { echo "Building $namespace/$image:$sha_tag from file $dockerfile" time docker build --build-arg CODE_VERSION=$sha_tag $rpc_version -t $namespace/$image:$sha_tag \ -f dockerfiles/$dockerfile $CLIPPER_ROOT - docker tag $namespace/$image:$sha_tag $namespace/$image:$version_tag + + echo "Publishing $namespace/$image:$sha_tag from file $dockerfile" + docker push $namespace/$image:$sha_tag + + # We will NOT tag the image to version tag to prevent collision + # docker tag $namespace/$image:$sha_tag $namespace/$image:$version_tag if [ "$publish" = true ] && [ "$public" = true ] ; then echo "Publishing $namespace/$image:$sha_tag" diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 6af56f434..abb3de6de 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -21,12 +21,7 @@ tag=$( VERSION.txt + RUN pip3 install -e /clipper/clipper_admin ENTRYPOINT ["/clipper/bin/ci_checks.sh", "false"] diff --git a/dockerfiles/ClipperTestsDockerfile b/dockerfiles/ClipperTestsDockerfile index 459354228..1f9500629 100644 --- a/dockerfiles/ClipperTestsDockerfile +++ b/dockerfiles/ClipperTestsDockerfile @@ -11,6 +11,10 @@ COPY ./ /clipper RUN cd /clipper/src/libs/spdlog \ && git apply ../patches/make_spdlog_compile_linux.patch +# Set version as git hash +RUN cd /clipper \ + && echo $(git rev-parse --verify --short HEAD) > VERSION.txt + RUN pip install -e /clipper/clipper_admin ENTRYPOINT ["/clipper/bin/ci_checks.sh", "true"] diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 7527431e1..fe89e80fb 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -14,7 +14,6 @@ KubernetesContainerManager, CLIPPER_TEMP_DIR, ClipperException) from clipper_admin.container_manager import CLIPPER_DOCKER_LABEL -from clipper_admin import __version__ as clipper_version logger = logging.getLogger(__name__) @@ -36,8 +35,6 @@ def __str__(self): # range of ports where available ports can be found PORT_RANGE = [34256, 50000] -CLIPPER_CONTAINER_REGISTRY = "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper" - def get_docker_client(): if "DOCKER_API_VERSION" in os.environ: @@ -151,14 +148,7 @@ def create_kubernetes_connection(cleanup=False, namespace=namespace, create_namespace_if_not_exists=True) cl = ClipperConnection(cm) - cl.start_clipper( - query_frontend_image= - "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper/query_frontend:{}". - format(clipper_version), - mgmt_frontend_image= - "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper/management_frontend:{}". - format(clipper_version), - num_frontend_replicas=num_frontend_replicas) + cl.start_clipper(num_frontend_replicas=num_frontend_replicas) if connect: try: From 105f0ef6e18efd90b6aa3ad13db41e107a6c3e95 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 13 Jun 2018 16:26:25 -0700 Subject: [PATCH 42/63] Add registry to docker build process --- bin/build_docker_images.sh | 4 ++-- dockerfiles/Caffe2OnnxDockerfile | 3 ++- dockerfiles/ClipperDevDockerfile | 3 ++- dockerfiles/ClipperLibBaseDockerfile | 1 + dockerfiles/ClipperPy35DevDockerfile | 3 ++- dockerfiles/ClipperPy35TestsDockerfile | 3 ++- dockerfiles/ClipperTestsDockerfile | 3 ++- dockerfiles/FrontendExporterDockerfile | 1 + dockerfiles/MXNetContainerDockerfile | 3 ++- dockerfiles/ManagementFrontendDockerfile | 3 ++- dockerfiles/NoopBenchDockerfile | 3 ++- dockerfiles/NoopDockerfile | 3 ++- dockerfiles/Py2RPCDockerfile | 1 + dockerfiles/Py35RPCDockerfile | 1 + dockerfiles/Py36RPCDockerfile | 1 + dockerfiles/PyClosureContainerDockerfile | 3 ++- dockerfiles/PySparkContainerDockerfile | 3 ++- dockerfiles/PyTorchContainerDockerfile | 3 ++- dockerfiles/QueryFrontendDockerfile | 3 ++- dockerfiles/RContainerDockerfile | 1 + dockerfiles/SparkScalaContainerDockerfile | 1 + dockerfiles/SumBenchDockerfile | 3 ++- dockerfiles/SumDockerfile | 3 ++- dockerfiles/TensorFlowDockerfile | 3 ++- 24 files changed, 41 insertions(+), 18 deletions(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index 7740ad18d..61c7dfe0d 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -189,7 +189,7 @@ set_version_tag () { set_version_tag -namespace="clipper" +namespace=$(docker info | grep Username | awk '{ print $2 }') # We build images with the SHA tag to try to prevent clobbering other images # being built from different branches on the same machine. This is particularly @@ -211,7 +211,7 @@ create_image () { echo "Building $namespace/$image:$sha_tag from file $dockerfile" - time docker build --build-arg CODE_VERSION=$sha_tag $rpc_version -t $namespace/$image:$sha_tag \ + time docker build --build-arg CODE_VERSION=$sha_tag --build-arg REGISTRY=$namespace $rpc_version -t $namespace/$image:$sha_tag \ -f dockerfiles/$dockerfile $CLIPPER_ROOT echo "Publishing $namespace/$image:$sha_tag from file $dockerfile" diff --git a/dockerfiles/Caffe2OnnxDockerfile b/dockerfiles/Caffe2OnnxDockerfile index 9b28f7bce..d6f918db1 100644 --- a/dockerfiles/Caffe2OnnxDockerfile +++ b/dockerfiles/Caffe2OnnxDockerfile @@ -1,6 +1,7 @@ +ARG REGISTRY ARG CODE_VERSION ARG RPC_VERSION -FROM clipper/${RPC_VERSION}-rpc:${CODE_VERSION} +FROM ${REGISTRY}/${RPC_VERSION}-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/ClipperDevDockerfile b/dockerfiles/ClipperDevDockerfile index dbad27e29..48e7851d5 100644 --- a/dockerfiles/ClipperDevDockerfile +++ b/dockerfiles/ClipperDevDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/lib_base:${CODE_VERSION} +FROM ${REGISTRY}/lib_base:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/ClipperLibBaseDockerfile b/dockerfiles/ClipperLibBaseDockerfile index 2110c1086..575b8601b 100644 --- a/dockerfiles/ClipperLibBaseDockerfile +++ b/dockerfiles/ClipperLibBaseDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM debian:stretch-slim diff --git a/dockerfiles/ClipperPy35DevDockerfile b/dockerfiles/ClipperPy35DevDockerfile index e1685e8da..b13c63403 100644 --- a/dockerfiles/ClipperPy35DevDockerfile +++ b/dockerfiles/ClipperPy35DevDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/lib_base:${CODE_VERSION} +FROM ${REGISTRY}/lib_base:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/ClipperPy35TestsDockerfile b/dockerfiles/ClipperPy35TestsDockerfile index e8a69e196..ce9612671 100644 --- a/dockerfiles/ClipperPy35TestsDockerfile +++ b/dockerfiles/ClipperPy35TestsDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/py35-dev:${CODE_VERSION} +FROM ${REGISTRY}/py35-dev:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/ClipperTestsDockerfile b/dockerfiles/ClipperTestsDockerfile index 1f9500629..ce3d01996 100644 --- a/dockerfiles/ClipperTestsDockerfile +++ b/dockerfiles/ClipperTestsDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/dev:${CODE_VERSION} +FROM ${REGISTRY}/dev:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/FrontendExporterDockerfile b/dockerfiles/FrontendExporterDockerfile index 29e47f759..8bab7c247 100644 --- a/dockerfiles/FrontendExporterDockerfile +++ b/dockerfiles/FrontendExporterDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM python:3.6-slim-stretch diff --git a/dockerfiles/MXNetContainerDockerfile b/dockerfiles/MXNetContainerDockerfile index 0cdf19275..3c1b0ac8a 100644 --- a/dockerfiles/MXNetContainerDockerfile +++ b/dockerfiles/MXNetContainerDockerfile @@ -1,6 +1,7 @@ +ARG REGISTRY ARG CODE_VERSION ARG RPC_VERSION -FROM clipper/${RPC_VERSION}-rpc:${CODE_VERSION} +FROM ${REGISTRY}/${RPC_VERSION}-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/ManagementFrontendDockerfile b/dockerfiles/ManagementFrontendDockerfile index abfd9e22b..4ea5bc5c9 100644 --- a/dockerfiles/ManagementFrontendDockerfile +++ b/dockerfiles/ManagementFrontendDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/lib_base:${CODE_VERSION} +FROM ${REGISTRY}/lib_base:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/NoopBenchDockerfile b/dockerfiles/NoopBenchDockerfile index ff9afa62b..c45e0de63 100644 --- a/dockerfiles/NoopBenchDockerfile +++ b/dockerfiles/NoopBenchDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/py-rpc:${CODE_VERSION} +FROM ${REGISTRY}/py-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/NoopDockerfile b/dockerfiles/NoopDockerfile index 6236295af..bd60adba2 100644 --- a/dockerfiles/NoopDockerfile +++ b/dockerfiles/NoopDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/py-rpc:${CODE_VERSION} +FROM ${REGISTRY}/py-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/Py2RPCDockerfile b/dockerfiles/Py2RPCDockerfile index 1c1b4f7c3..388c45693 100644 --- a/dockerfiles/Py2RPCDockerfile +++ b/dockerfiles/Py2RPCDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM python:2.7.14-slim-stretch diff --git a/dockerfiles/Py35RPCDockerfile b/dockerfiles/Py35RPCDockerfile index 7b42c63d7..ef7f6d9c5 100644 --- a/dockerfiles/Py35RPCDockerfile +++ b/dockerfiles/Py35RPCDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM python:3.5-slim-jessie diff --git a/dockerfiles/Py36RPCDockerfile b/dockerfiles/Py36RPCDockerfile index c4e27ebd9..189a40503 100644 --- a/dockerfiles/Py36RPCDockerfile +++ b/dockerfiles/Py36RPCDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM python:3.6-slim-stretch diff --git a/dockerfiles/PyClosureContainerDockerfile b/dockerfiles/PyClosureContainerDockerfile index d3b334490..2a3f831a6 100644 --- a/dockerfiles/PyClosureContainerDockerfile +++ b/dockerfiles/PyClosureContainerDockerfile @@ -1,6 +1,7 @@ +ARG REGISTRY ARG CODE_VERSION ARG RPC_VERSION -FROM clipper/${RPC_VERSION}-rpc:${CODE_VERSION} +FROM ${REGISTRY}/${RPC_VERSION}-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/PySparkContainerDockerfile b/dockerfiles/PySparkContainerDockerfile index ea66f08c1..fdf61f2ee 100644 --- a/dockerfiles/PySparkContainerDockerfile +++ b/dockerfiles/PySparkContainerDockerfile @@ -1,6 +1,7 @@ +ARG REGISTRY ARG CODE_VERSION ARG RPC_VERSION -FROM clipper/${RPC_VERSION}-rpc:${CODE_VERSION} +FROM ${REGISTRY}/${RPC_VERSION}-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/PyTorchContainerDockerfile b/dockerfiles/PyTorchContainerDockerfile index afadf46fe..f5ef18586 100644 --- a/dockerfiles/PyTorchContainerDockerfile +++ b/dockerfiles/PyTorchContainerDockerfile @@ -1,6 +1,7 @@ +ARG REGISTRY ARG CODE_VERSION ARG RPC_VERSION -FROM clipper/${RPC_VERSION}-rpc:${CODE_VERSION} +FROM ${REGISTRY}/${RPC_VERSION}-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/QueryFrontendDockerfile b/dockerfiles/QueryFrontendDockerfile index b07dc8f86..df52e7124 100644 --- a/dockerfiles/QueryFrontendDockerfile +++ b/dockerfiles/QueryFrontendDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/lib_base:${CODE_VERSION} +FROM ${REGISTRY}/lib_base:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/RContainerDockerfile b/dockerfiles/RContainerDockerfile index 46647ce80..80c4d875e 100644 --- a/dockerfiles/RContainerDockerfile +++ b/dockerfiles/RContainerDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM r-base:3.4.4 diff --git a/dockerfiles/SparkScalaContainerDockerfile b/dockerfiles/SparkScalaContainerDockerfile index b26e1566d..83dcbe425 100644 --- a/dockerfiles/SparkScalaContainerDockerfile +++ b/dockerfiles/SparkScalaContainerDockerfile @@ -1,3 +1,4 @@ +ARG REGISTRY # This ARG isn't used but prevents warnings in the build script ARG CODE_VERSION FROM openjdk:8-jdk diff --git a/dockerfiles/SumBenchDockerfile b/dockerfiles/SumBenchDockerfile index d3080afaa..4641d4716 100644 --- a/dockerfiles/SumBenchDockerfile +++ b/dockerfiles/SumBenchDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/py-rpc:${CODE_VERSION} +FROM ${REGISTRY}/py-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/SumDockerfile b/dockerfiles/SumDockerfile index 49db38ce8..0edaf801d 100644 --- a/dockerfiles/SumDockerfile +++ b/dockerfiles/SumDockerfile @@ -1,5 +1,6 @@ +ARG REGISTRY ARG CODE_VERSION -FROM clipper/py-rpc:${CODE_VERSION} +FROM ${REGISTRY}/py-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " diff --git a/dockerfiles/TensorFlowDockerfile b/dockerfiles/TensorFlowDockerfile index 0c72d6500..7c5f67844 100644 --- a/dockerfiles/TensorFlowDockerfile +++ b/dockerfiles/TensorFlowDockerfile @@ -1,6 +1,7 @@ +ARG REGISTRY ARG CODE_VERSION ARG RPC_VERSION -FROM clipper/${RPC_VERSION}-rpc:${CODE_VERSION} +FROM ${REGISTRY}/${RPC_VERSION}-rpc:${CODE_VERSION} LABEL maintainer="Dan Crankshaw " From e5eae35e100abac175d7ede02cd1348052226df4 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 13 Jun 2018 16:57:05 -0700 Subject: [PATCH 43/63] Add cluster info to image name --- clipper_admin/clipper_admin/clipper_admin.py | 4 ++-- .../clipper_admin/kubernetes/kubernetes_container_manager.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index b267a2a42..4db4c5dab 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -443,7 +443,7 @@ def build_model(self, context_tar.addfile(df_tarinfo, df_contents) except TypeError: df_contents = StringIO( - "FROM {container_name}\nCOPY {data_path} /model/\n{run_command}\n". + "FROM {container_name}\n{run_command}\nCOPY {data_path} /model/\n". format( container_name=base_image, data_path=model_data_path, @@ -456,7 +456,7 @@ def build_model(self, # Exit Tarfile context manager to finish the tar file # Seek back to beginning of file for reading context_file.seek(0) - image = "{name}:{version}".format(name=name, version=version) + image = "{cluster}-{name}:{version}".format(cluster=self.cm.cluster_identifier, name=name, version=version) if container_registry is not None: image = "{reg}/{image}".format( reg=container_registry, image=image) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 867e90a9a..088ec1395 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -161,7 +161,7 @@ def __init__(self, raise ClipperException(msg) self.cluster_identifier = "{ns}{cluster}".format( - ns=self.k8s_namespace + '/' + ns=self.k8s_namespace + '-' if self.k8s_namespace != "default" else "", cluster=self.cluster_name) self.logger = ClusterAdapter(logger, { From 705394af239f93795e644570c9a8dcd042e31d36 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 13 Jun 2018 17:04:22 -0700 Subject: [PATCH 44/63] Format code --- clipper_admin/clipper_admin/clipper_admin.py | 25 ++++++++++--------- .../clipper_admin/container_manager.py | 3 ++- .../clipper_admin/deployers/mxnet.py | 9 ++++--- clipper_admin/clipper_admin/deployers/onnx.py | 3 ++- .../clipper_admin/deployers/pyspark.py | 9 ++++--- .../clipper_admin/deployers/tensorflow.py | 6 +++-- .../docker/docker_container_manager.py | 3 ++- .../docker/docker_metric_utils.py | 3 ++- .../kubernetes_container_manager.py | 7 +++--- clipper_admin/setup.py | 6 ++--- 10 files changed, 44 insertions(+), 30 deletions(-) diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 4db4c5dab..e40c67ae9 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -83,16 +83,15 @@ def __init__(self, container_manager): 'cluster_name': self.cm.cluster_identifier }) - def start_clipper( - self, - query_frontend_image='{}/query_frontend:{}'.format( - __registry__, __version__), - mgmt_frontend_image='{}/management_frontend:{}'.format( - __registry__, __version__), - frontend_exporter_image='{}/frontend-exporter:{}'.format( - __registry__, __version__), - cache_size=DEFAULT_PREDICTION_CACHE_SIZE_BYTES, - num_frontend_replicas=1): + def start_clipper(self, + query_frontend_image='{}/query_frontend:{}'.format( + __registry__, __version__), + mgmt_frontend_image='{}/management_frontend:{}'.format( + __registry__, __version__), + frontend_exporter_image='{}/frontend-exporter:{}'.format( + __registry__, __version__), + cache_size=DEFAULT_PREDICTION_CACHE_SIZE_BYTES, + num_frontend_replicas=1): """Start a new Clipper cluster and connect to it. This command will start a new Clipper instance using the container manager provided when @@ -123,7 +122,8 @@ def start_clipper( """ try: self.cm.start_clipper(query_frontend_image, mgmt_frontend_image, - frontend_exporter_image, cache_size, num_frontend_replicas) + frontend_exporter_image, cache_size, + num_frontend_replicas) while True: try: url = "http://{host}/metrics".format( @@ -456,7 +456,8 @@ def build_model(self, # Exit Tarfile context manager to finish the tar file # Seek back to beginning of file for reading context_file.seek(0) - image = "{cluster}-{name}:{version}".format(cluster=self.cm.cluster_identifier, name=name, version=version) + image = "{cluster}-{name}:{version}".format( + cluster=self.cm.cluster_identifier, name=name, version=version) if container_registry is not None: image = "{reg}/{image}".format( reg=container_registry, image=image) diff --git a/clipper_admin/clipper_admin/container_manager.py b/clipper_admin/clipper_admin/container_manager.py index 83cf03411..b03687adb 100644 --- a/clipper_admin/clipper_admin/container_manager.py +++ b/clipper_admin/clipper_admin/container_manager.py @@ -65,7 +65,8 @@ class ContainerManager(object): @abc.abstractmethod def start_clipper(self, query_frontend_image, mgmt_frontend_image, - frontend_exporter_image, cache_size, num_frontend_replicas): + frontend_exporter_image, cache_size, + num_frontend_replicas): # NOTE: An implementation of this interface should be connected to a running # Clipper instance when this method returns. ClipperConnection will not # call ContainerManager.connect() separately after calling start_clipper(), so diff --git a/clipper_admin/clipper_admin/deployers/mxnet.py b/clipper_admin/clipper_admin/deployers/mxnet.py index 27408818d..51f9de053 100644 --- a/clipper_admin/clipper_admin/deployers/mxnet.py +++ b/clipper_admin/clipper_admin/deployers/mxnet.py @@ -254,13 +254,16 @@ def deploy_mxnet_model(clipper_conn, if base_image == "default": if py_minor_version < (3, 0): logger.info("Using Python 2 base image") - base_image = "{}/mxnet-container:{}".format(__registry__, __version__) + base_image = "{}/mxnet-container:{}".format( + __registry__, __version__) elif py_minor_version == (3, 5): logger.info("Using Python 3.5 base image") - base_image = "{}/mxnet35-container:{}".format(__registry__, __version__) + base_image = "{}/mxnet35-container:{}".format( + __registry__, __version__) elif py_minor_version == (3, 6): logger.info("Using Python 3.6 base image") - base_image = "{}/mxnet36-container:{}".format(__registry__, __version__) + base_image = "{}/mxnet36-container:{}".format( + __registry__, __version__) else: msg = ( "MXNet deployer only supports Python 2.7, 3.5, and 3.6. " diff --git a/clipper_admin/clipper_admin/deployers/onnx.py b/clipper_admin/clipper_admin/deployers/onnx.py index 593399b07..0dcaabe5a 100644 --- a/clipper_admin/clipper_admin/deployers/onnx.py +++ b/clipper_admin/clipper_admin/deployers/onnx.py @@ -175,7 +175,8 @@ def deploy_pytorch_model(clipper_conn, if base_image is None: if onnx_backend is "caffe2": - base_image = "{}/caffe2-onnx-container:{}".format(__registry__, __version__) + base_image = "{}/caffe2-onnx-container:{}".format( + __registry__, __version__) else: logger.error( "{backend} ONNX backend is not currently supported.".format( diff --git a/clipper_admin/clipper_admin/deployers/pyspark.py b/clipper_admin/clipper_admin/deployers/pyspark.py index e149207c2..e3fe50510 100644 --- a/clipper_admin/clipper_admin/deployers/pyspark.py +++ b/clipper_admin/clipper_admin/deployers/pyspark.py @@ -238,13 +238,16 @@ def predict(spark, model, inputs): if base_image == "default": if py_minor_version < (3, 0): logger.info("Using Python 2 base image") - base_image = "{}/pyspark-container:{}".format(__registry__, __version__) + base_image = "{}/pyspark-container:{}".format( + __registry__, __version__) elif py_minor_version == (3, 5): logger.info("Using Python 3.5 base image") - base_image = "{}/pyspark35-container:{}".format(__registry__, __version__) + base_image = "{}/pyspark35-container:{}".format( + __registry__, __version__) elif py_minor_version == (3, 6): logger.info("Using Python 3.6 base image") - base_image = "{}/pyspark36-container:{}".format(__registry__, __version__) + base_image = "{}/pyspark36-container:{}".format( + __registry__, __version__) else: msg = ("PySpark deployer only supports Python 2.7, 3.5, and 3.6. " "Detected {major}.{minor}").format( diff --git a/clipper_admin/clipper_admin/deployers/tensorflow.py b/clipper_admin/clipper_admin/deployers/tensorflow.py index 327697dfc..1ff45eb68 100644 --- a/clipper_admin/clipper_admin/deployers/tensorflow.py +++ b/clipper_admin/clipper_admin/deployers/tensorflow.py @@ -271,10 +271,12 @@ def predict(sess, inputs): base_image = "{}/tf-container:{}".format(__registry__, __version__) elif py_minor_version == (3, 5): logger.info("Using Python 3.5 base image") - base_image = "{}/tf35-container:{}".format(__registry__, __version__) + base_image = "{}/tf35-container:{}".format(__registry__, + __version__) elif py_minor_version == (3, 6): logger.info("Using Python 3.6 base image") - base_image = "{}/tf36-container:{}".format(__registry__, __version__) + base_image = "{}/tf36-container:{}".format(__registry__, + __version__) else: msg = ( "TensorFlow deployer only supports Python 2.7, 3.5, and 3.6. " diff --git a/clipper_admin/clipper_admin/docker/docker_container_manager.py b/clipper_admin/clipper_admin/docker/docker_container_manager.py index d393da0f4..2b6ddf827 100644 --- a/clipper_admin/clipper_admin/docker/docker_container_manager.py +++ b/clipper_admin/clipper_admin/docker/docker_container_manager.py @@ -218,7 +218,8 @@ def start_clipper(self, query_container_id) run_query_frontend_metric_image( query_frontend_metric_name, self.docker_client, query_name, - frontend_exporter_image, self.common_labels, self.extra_container_kwargs) + frontend_exporter_image, self.common_labels, + self.extra_container_kwargs) self.prom_config_path = tempfile.NamedTemporaryFile( 'w', suffix='.yml', delete=False).name diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index b25fdb508..e528edb44 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -19,7 +19,8 @@ def get_prometheus_base_config(): def run_query_frontend_metric_image(name, docker_client, query_name, - frontend_exporter_image, common_labels, extra_container_kwargs): + frontend_exporter_image, common_labels, + extra_container_kwargs): """ Use docker_client to run a frontend-exporter image. :param name: Name to pass in, need to be unique. diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 088ec1395..8dde97d2d 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -177,8 +177,8 @@ def start_clipper(self, self._start_redis() self._start_mgmt(mgmt_frontend_image) self.num_frontend_replicas = num_frontend_replicas - self._start_query(query_frontend_image, frontend_exporter_image, cache_size, - num_frontend_replicas) + self._start_query(query_frontend_image, frontend_exporter_image, + cache_size, num_frontend_replicas) self._start_prometheus() self.connect() @@ -222,7 +222,8 @@ def _start_mgmt(self, mgmt_image): self._k8s_v1.create_namespaced_service( body=mgmt_service_data, namespace=self.k8s_namespace) - def _start_query(self, query_image, frontend_exporter_image, cache_size, num_replicas): + def _start_query(self, query_image, frontend_exporter_image, cache_size, + num_replicas): for query_frontend_id in range(num_replicas): with _pass_conflicts(): query_deployment_data = self._generate_config( diff --git a/clipper_admin/setup.py b/clipper_admin/setup.py index ffe05d988..aeda894d4 100644 --- a/clipper_admin/setup.py +++ b/clipper_admin/setup.py @@ -24,9 +24,9 @@ keywords=['clipper', 'prediction', 'model', 'management'], install_requires=[ 'requests', 'numpy', 'subprocess32; python_version<"3"', 'pyyaml', - 'docker>=3.0', 'kubernetes>=6.0.0', 'prometheus_client', 'cloudpickle>=0.5', - 'enum34; python_version<"3.4"', 'redis', 'psutil', 'jsonschema', - 'jinja2' + 'docker>=3.0', 'kubernetes>=6.0.0', 'prometheus_client', + 'cloudpickle>=0.5', 'enum34; python_version<"3.4"', 'redis', 'psutil', + 'jsonschema', 'jinja2' ], extras_require={ 'PySpark': ['pyspark'], From 4c2d8d2b977c2ddc848a3fa9fea4b9bbad4c5508 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 13 Jun 2018 17:30:37 -0700 Subject: [PATCH 45/63] Run unittests in clippertest --- bin/run_ci.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/run_ci.sh b/bin/run_ci.sh index abb3de6de..faa5b6fa2 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -21,7 +21,7 @@ tag=$( Date: Thu, 14 Jun 2018 16:11:29 -0700 Subject: [PATCH 46/63] Address comment --- bin/README.md | 12 ++++++++++-- bin/build_docker_images.sh | 12 +++++++----- bin/run_ci.sh | 9 +++++++-- .../kubernetes/kubernetes_container_manager.py | 18 +++++++++--------- dockerfiles/ClipperLibBaseDockerfile | 2 +- dockerfiles/FrontendExporterDockerfile | 2 +- dockerfiles/Py2RPCDockerfile | 2 +- dockerfiles/Py35RPCDockerfile | 2 +- dockerfiles/Py36RPCDockerfile | 2 +- dockerfiles/RContainerDockerfile | 2 +- dockerfiles/SparkScalaContainerDockerfile | 2 +- 11 files changed, 40 insertions(+), 25 deletions(-) diff --git a/bin/README.md b/bin/README.md index 7b48ba8b1..b86fc2734 100644 --- a/bin/README.md +++ b/bin/README.md @@ -5,8 +5,16 @@ The wonderful AMPlab Jenkins is responsible for running our integration test. ## How does the CI process work -0. Jenkins pull the PR and sandbox it. -1. Jenkins inject environment variables configured in admin page +0. Jenkins pull the PR. +1. Jenkins inject environment variables configured in admin page. Currently, we set the following environment variables +in Pull Request Builder: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - CLIPPER_K8S_CERT_AUTH + - CLIPPER_K8S_CLIENT_CERT + - CLIPPER_K8S_CLIENT_KEY + - CLIPPER_K8S_PASSWORD + - CLIPPER_TESTING_DOCKERHUB_PASSWORD 2. Jenkins will call `run_ci.sh`. It does three things: - It calls `build_docker_images.sh` to build all the docker images - Then it runs unittests docker container for python2 and python3 diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index 61c7dfe0d..f781fc94a 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -191,6 +191,9 @@ set_version_tag namespace=$(docker info | grep Username | awk '{ print $2 }') +# Clear clipper_docker_images.txt for future write +echo "" > clipper_docker_images.txt + # We build images with the SHA tag to try to prevent clobbering other images # being built from different branches on the same machine. This is particularly # useful for running these scripts on the Jenkins build cluster. @@ -214,13 +217,12 @@ create_image () { time docker build --build-arg CODE_VERSION=$sha_tag --build-arg REGISTRY=$namespace $rpc_version -t $namespace/$image:$sha_tag \ -f dockerfiles/$dockerfile $CLIPPER_ROOT - echo "Publishing $namespace/$image:$sha_tag from file $dockerfile" - docker push $namespace/$image:$sha_tag - - # We will NOT tag the image to version tag to prevent collision - # docker tag $namespace/$image:$sha_tag $namespace/$image:$version_tag + echo "Image tag appended to CLIPPER_ROOT/bin/clipper_docker_images.txt" + echo "$namespace/$image:$sha_tag" >> clipper_docker_images.txt if [ "$publish" = true ] && [ "$public" = true ] ; then + docker tag $namespace/$image:$sha_tag $namespace/$image:$version_tag + echo "Publishing $namespace/$image:$sha_tag" docker push $namespace/$image:$sha_tag echo "Publishing $namespace/$image:$version_tag" diff --git a/bin/run_ci.sh b/bin/run_ci.sh index faa5b6fa2..28f6dddea 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -21,7 +21,12 @@ tag=$( Date: Thu, 14 Jun 2018 16:27:53 -0700 Subject: [PATCH 47/63] fix clipper_docker_images location --- bin/build_docker_images.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index f781fc94a..872f5f05d 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -192,7 +192,7 @@ set_version_tag namespace=$(docker info | grep Username | awk '{ print $2 }') # Clear clipper_docker_images.txt for future write -echo "" > clipper_docker_images.txt +echo "" > $CLIPPER_ROOT/bin/clipper_docker_images.txt # We build images with the SHA tag to try to prevent clobbering other images # being built from different branches on the same machine. This is particularly @@ -218,7 +218,7 @@ create_image () { -f dockerfiles/$dockerfile $CLIPPER_ROOT echo "Image tag appended to CLIPPER_ROOT/bin/clipper_docker_images.txt" - echo "$namespace/$image:$sha_tag" >> clipper_docker_images.txt + echo "$namespace/$image:$sha_tag" >> $CLIPPER_ROOT/bin/clipper_docker_images.txt if [ "$publish" = true ] && [ "$public" = true ] ; then docker tag $namespace/$image:$sha_tag $namespace/$image:$version_tag From 25f3ddc52f6af1f2aef4d7f2fab90f0125c059fd Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 14 Jun 2018 16:49:44 -0700 Subject: [PATCH 48/63] Add set -x to debug "invalid reference format" --- bin/build_docker_images.sh | 2 +- bin/run_ci.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index 872f5f05d..8e87b3f5b 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -14,7 +14,7 @@ # pin their docker images to the minor version and get updates with new patches # automatically. - +set -x set -e set -u set -o pipefail diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 28f6dddea..0fb2a3050 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash +set -x set -e set -u set -o pipefail From 9c5bb3bf7a51484fe692ce9ebf26ccc0d1b9bd17 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 14 Jun 2018 17:02:41 -0700 Subject: [PATCH 49/63] Add debug lines --- bin/run_ci.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/run_ci.sh b/bin/run_ci.sh index 0fb2a3050..a6008dfb2 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -22,6 +22,9 @@ tag=$( Date: Thu, 14 Jun 2018 17:11:25 -0700 Subject: [PATCH 50/63] Fix the blank line issue in clipper_docker_images.txt --- bin/build_docker_images.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index 8e87b3f5b..0bbf2d120 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -192,7 +192,8 @@ set_version_tag namespace=$(docker info | grep Username | awk '{ print $2 }') # Clear clipper_docker_images.txt for future write -echo "" > $CLIPPER_ROOT/bin/clipper_docker_images.txt +rm -f $CLIPPER_ROOT/bin/clipper_docker_images.txt +touch $CLIPPER_ROOT/bin/clipper_docker_images.txt # We build images with the SHA tag to try to prevent clobbering other images # being built from different branches on the same machine. This is particularly From db84d886884c333becbf0ec2d2a808f0bcaf26d0 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 14 Jun 2018 19:46:13 -0700 Subject: [PATCH 51/63] Don't pull ecr images --- bin/ci_checks.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/ci_checks.sh b/bin/ci_checks.sh index c3862c12d..c4777eadc 100755 --- a/bin/ci_checks.sh +++ b/bin/ci_checks.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash +set -x set -e set -u set -o pipefail @@ -26,10 +27,10 @@ tag=$( Date: Thu, 14 Jun 2018 21:15:11 -0700 Subject: [PATCH 52/63] Fix import --- .../clipper_admin/kubernetes/kubernetes_container_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index 89d19890c..70c2f8258 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -5,7 +5,7 @@ CLIPPER_INTERNAL_MANAGEMENT_PORT, CLIPPER_INTERNAL_QUERY_PORT, CLIPPER_INTERNAL_METRIC_PORT, CLIPPER_NAME_LABEL, ClusterAdapter) from ..exceptions import ClipperException -from .kubernetes_metric_utils import PROM_VERSION, CLIPPER_FRONTEND_EXPORTER_IMAGE +from .kubernetes_metric_utils import PROM_VERSION from contextlib import contextmanager from kubernetes import client, config From 1c3ba94cb776faa22498dbc25a4e968a96b18263 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 15 Jun 2018 14:55:07 -0700 Subject: [PATCH 53/63] Set git hash length to at least 10 to prevent collision Such that different version of git in different environement can agree that the length of the hash --- bin/build_docker_images.sh | 2 +- bin/run_ci.sh | 2 +- dockerfiles/ClipperPy35TestsDockerfile | 2 +- dockerfiles/ClipperTestsDockerfile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh index 0bbf2d120..f32a4c73d 100755 --- a/bin/build_docker_images.sh +++ b/bin/build_docker_images.sh @@ -31,7 +31,7 @@ cd $CLIPPER_ROOT # Initialize tags version_tag=$( VERSION.txt + && echo $(git rev-parse --verify --short=10 HEAD) > VERSION.txt RUN pip3 install -e /clipper/clipper_admin diff --git a/dockerfiles/ClipperTestsDockerfile b/dockerfiles/ClipperTestsDockerfile index ce3d01996..e131b6c98 100644 --- a/dockerfiles/ClipperTestsDockerfile +++ b/dockerfiles/ClipperTestsDockerfile @@ -14,7 +14,7 @@ RUN cd /clipper/src/libs/spdlog \ # Set version as git hash RUN cd /clipper \ - && echo $(git rev-parse --verify --short HEAD) > VERSION.txt + && echo $(git rev-parse --verify --short=10 HEAD) > VERSION.txt RUN pip install -e /clipper/clipper_admin From 533acbebbf6dcb56865e52e5015e285d3588c4b1 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Fri, 15 Jun 2018 16:16:08 -0700 Subject: [PATCH 54/63] Update registry in test --- .../R/rclipper_user/inst/build_container.py | 4 +- integration-tests/clipper_admin_tests.py | 56 +++++++++++-------- integration-tests/clipper_metric_kube.py | 4 +- .../kubernetes_integration_test.py | 4 +- .../kubernetes_multi_frontend.py | 4 +- integration-tests/many_apps_many_models.py | 4 +- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/containers/R/rclipper_user/inst/build_container.py b/containers/R/rclipper_user/inst/build_container.py index c41b9cce6..c6396c637 100644 --- a/containers/R/rclipper_user/inst/build_container.py +++ b/containers/R/rclipper_user/inst/build_container.py @@ -5,8 +5,8 @@ from clipper_admin import DockerContainerManager, ClipperConnection, ClipperException from clipper_admin import version -CLIPPER_R_CONTAINER_BASE_IMAGE = "clipper/r-container-base:{}".format( - version.__version__) +CLIPPER_R_CONTAINER_BASE_IMAGE = "{}/r-container-base:{}".format( + version.__registry__, version.__version__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Launch an R model container") diff --git a/integration-tests/clipper_admin_tests.py b/integration-tests/clipper_admin_tests.py index dc4be7646..9afd358cf 100644 --- a/integration-tests/clipper_admin_tests.py +++ b/integration-tests/clipper_admin_tests.py @@ -26,7 +26,7 @@ import clipper_admin as cl from clipper_admin.deployers.python import create_endpoint as create_py_endpoint from clipper_admin.deployers.python import deploy_python_closure -from clipper_admin import __version__ as clipper_version +from clipper_admin import __version__ as clipper_version, __registry__ as clipper_registry from clipper_admin.container_manager import CLIPPER_DOCKER_LABEL logging.basicConfig( @@ -191,7 +191,8 @@ def test_inspect_instance_returns_json_dict(self): def test_model_deploys_successfully(self): model_name = "m" version = "v1" - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) input_type = "doubles" self.clipper_conn.build_and_deploy_model( model_name, version, input_type, fake_model_data, container_name) @@ -208,7 +209,8 @@ def test_set_num_replicas_for_deployed_model_succeeds(self): model_name = "set-num-reps-model" input_type = "doubles" version = "v1" - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) input_type = "doubles" self.clipper_conn.build_and_deploy_model( model_name, version, input_type, fake_model_data, container_name) @@ -225,7 +227,8 @@ def test_set_num_replicas_for_deployed_model_succeeds(self): self.assertEqual(num_reps, 2) def test_remove_inactive_containers_succeeds(self): - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) input_type = "doubles" model_name = "remove-inactive-test-model" self.clipper_conn.build_and_deploy_model( @@ -260,7 +263,8 @@ def test_remove_inactive_containers_succeeds(self): self.assertEqual(len(containers), 3) def test_stop_models(self): - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) input_type = "doubles" mnames = ["jimmypage", "robertplant", "jpj", "johnbohnam"] versions = ["i", "ii", "iii", "iv"] @@ -323,23 +327,23 @@ def predict_func(inputs): containers = docker_client.containers.list( filters={ "ancestor": - "clipper/python-closure-container:{}".format( - clipper_version) + "{}/python-closure-container:{}".format( + clipper_registry, clipper_version) }) elif py_minor_version == (3, 5): containers = docker_client.containers.list( filters={ "ancestor": - "clipper/python35-closure-container:{}".format( - clipper_version) + "{}/python35-closure-container:{}".format( + clipper_registry, clipper_version) }) elif py_minor_version == (3, 6): containers = docker_client.containers.list( filters={ "ancestor": - "clipper/python36-closure-container:{}".format( - clipper_version) + "{}/python36-closure-container:{}".format( + clipper_registry, clipper_version) }) else: msg = ( @@ -378,8 +382,8 @@ def predict_func(inputs): containers = docker_client.containers.list( filters={ "ancestor": - "clipper/python-closure-container:{}".format( - clipper_version), + "{}/python-closure-container:{}".format( + clipper_registry, clipper_version), "label": "{key}={val}".format( key=CLIPPER_DOCKER_LABEL, @@ -390,8 +394,8 @@ def predict_func(inputs): containers = docker_client.containers.list( filters={ "ancestor": - "clipper/python35-closure-container:{}".format( - clipper_version), + "{}/python35-closure-container:{}".format( + clipper_registry, clipper_version), "label": "{key}={val}".format( key=CLIPPER_DOCKER_LABEL, @@ -401,8 +405,8 @@ def predict_func(inputs): containers = docker_client.containers.list( filters={ "ancestor": - "clipper/python36-closure-container:{}".format( - clipper_version), + "{}/python36-closure-container:{}".format( + clipper_registry, clipper_version), "label": "{key}={val}".format( key=CLIPPER_DOCKER_LABEL, @@ -561,21 +565,24 @@ def test_build_model_with_custom_packages(self): "buildmodeltest", "py2", fake_model_data, - "clipper/python-closure-container:{}".format(clipper_version), + "{}/python-closure-container:{}".format(clipper_registry, + clipper_version), None, pkgs_to_install=["sympy==1.1.*"]) self.clipper_conn.build_model( "buildmodeltest", "py35", fake_model_data, - "clipper/python35-closure-container:{}".format(clipper_version), + "{}/python35-closure-container:{}".format(clipper_registry, + clipper_version), None, pkgs_to_install=["sympy==1.1.*"]) self.clipper_conn.build_model( "buildmodeltest", "py36", fake_model_data, - "clipper/python35-closure-container:{}".format(clipper_version), + "{}/python35-closure-container:{}".format(clipper_registry, + clipper_version), None, pkgs_to_install=["sympy==1.1.*"]) @@ -643,7 +650,8 @@ def test_unlinked_app_returns_default_predictions(self): def test_deployed_model_queried_successfully(self): model_version = 1 - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) self.clipper_conn.build_and_deploy_model( self.model_name_2, model_version, self.input_type, fake_model_data, container_name) @@ -664,7 +672,8 @@ def test_deployed_model_queried_successfully(self): def test_batch_queries_returned_successfully(self): model_version = 1 - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) self.clipper_conn.build_and_deploy_model( self.model_name_3, model_version, self.input_type, fake_model_data, container_name) @@ -759,7 +768,8 @@ def predict_func(inputs): int(total_num_queries * .7)) def test_remove_inactive_container(self): - container_name = "clipper/noop-container:{}".format(clipper_version) + container_name = "{}/noop-container:{}".format(clipper_registry, + clipper_version) self.clipper_conn.build_and_deploy_model( self.model_name_5, 1, diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index c8b082294..819406231 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -17,7 +17,7 @@ fake_model_data, headers, log_clipper_state) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) -from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException +from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException, __registry__ as clipper_registry logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', @@ -37,7 +37,7 @@ def deploy_model(clipper_conn, name, link=False): 1, "doubles", fake_model_data, - "clipper/noop-container:{}".format(clipper_version), + "{}/noop-container:{}".format(clipper_registry, clipper_version), num_replicas=2, # We set it to 2 for metric purpose. container_registry= "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index 76f180d20..e3b935b69 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -13,7 +13,7 @@ CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) -from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException +from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException, __registry__ as clipper_registry logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', @@ -33,7 +33,7 @@ def deploy_model(clipper_conn, name, version, link=False): version, "doubles", fake_model_data, - "clipper/noop-container:{}".format(clipper_version), + "{}/noop-container:{}".format(clipper_registry, clipper_version), num_replicas=1, container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index f3b3100f4..8b427bd5e 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -19,7 +19,7 @@ cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) -from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException +from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException, __registry__ as clipper_registry logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', @@ -38,7 +38,7 @@ def deploy_model(clipper_conn, name, link=False): str(int(time.time())), # random string as version "doubles", fake_model_data, - "clipper/noop-container:{}".format(clipper_version), + "{}/noop-container:{}".format(clipper_registry, clipper_version), num_replicas=1, container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) diff --git a/integration-tests/many_apps_many_models.py b/integration-tests/many_apps_many_models.py index 3c40672ae..6016ac8ad 100644 --- a/integration-tests/many_apps_many_models.py +++ b/integration-tests/many_apps_many_models.py @@ -12,7 +12,7 @@ fake_model_data, headers, log_clipper_state) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) -from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR +from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, __registry__ as clipper_registry logging.basicConfig( format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', @@ -30,7 +30,7 @@ def deploy_model(clipper_conn, name, version, link=False): version, "doubles", fake_model_data, - "clipper/noop-container:{}".format(clipper_version), + "{}/noop-container:{}".format(clipper_registry, clipper_version), num_replicas=1) time.sleep(10) From 855a3074f8dc99c6535bf28fb42d0a48e147a405 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Mon, 18 Jun 2018 20:37:22 +0000 Subject: [PATCH 55/63] Add version back to metrics --- clipper_admin/clipper_admin/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clipper_admin/clipper_admin/metrics/__init__.py b/clipper_admin/clipper_admin/metrics/__init__.py index 94a7ec8de..dda64f1a3 100644 --- a/clipper_admin/clipper_admin/metrics/__init__.py +++ b/clipper_admin/clipper_admin/metrics/__init__.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +from ..version import __version__ from .client import add_metric, report_metric from . import server From 99e25172b8520872a2dea6f526b083d7c7fda7a3 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 18 Jun 2018 15:22:57 -0700 Subject: [PATCH 56/63] Remove CLIPPER_CONTAINER_REGISTRY constant in test --- .../kubernetes_integration_test.py | 6 ++---- .../kubernetes_multi_frontend.py | 6 ++---- integration-tests/kubernetes_namespace.py | 10 +++------- integration-tests/multi_tenancy_test.py | 20 ++++++------------- 4 files changed, 13 insertions(+), 29 deletions(-) diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index e3b935b69..14d536445 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -9,8 +9,7 @@ import time import logging from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state, - CLIPPER_CONTAINER_REGISTRY) + fake_model_data, headers, log_clipper_state) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException, __registry__ as clipper_registry @@ -34,8 +33,7 @@ def deploy_model(clipper_conn, name, version, link=False): "doubles", fake_model_data, "{}/noop-container:{}".format(clipper_registry, clipper_version), - num_replicas=1, - container_registry=CLIPPER_CONTAINER_REGISTRY) + num_replicas=1) time.sleep(10) if link: diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index 8b427bd5e..d7c38712b 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -14,8 +14,7 @@ import logging import yaml from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state, - CLIPPER_CONTAINER_REGISTRY) + fake_model_data, headers, log_clipper_state) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) @@ -39,8 +38,7 @@ def deploy_model(clipper_conn, name, link=False): "doubles", fake_model_data, "{}/noop-container:{}".format(clipper_registry, clipper_version), - num_replicas=1, - container_registry=CLIPPER_CONTAINER_REGISTRY) + num_replicas=1) time.sleep(10) if link: diff --git a/integration-tests/kubernetes_namespace.py b/integration-tests/kubernetes_namespace.py index 0a34d9668..b9f27af05 100644 --- a/integration-tests/kubernetes_namespace.py +++ b/integration-tests/kubernetes_namespace.py @@ -12,7 +12,7 @@ from datetime import datetime import os import time -from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY +from test_utils import create_kubernetes_connection, create_docker_connection def test(): @@ -38,12 +38,8 @@ def feature_sum(xs): def deploy_(clipper_conn): - python_deployer.create_endpoint( - clipper_conn, - "testapp0-model", - "doubles", - feature_sum, - registry=CLIPPER_CONTAINER_REGISTRY) + python_deployer.create_endpoint(clipper_conn, "testapp0-model", "doubles", + feature_sum) def predict_(addr, x, batch=False): diff --git a/integration-tests/multi_tenancy_test.py b/integration-tests/multi_tenancy_test.py index 763ed1527..93292686c 100644 --- a/integration-tests/multi_tenancy_test.py +++ b/integration-tests/multi_tenancy_test.py @@ -8,15 +8,15 @@ from datetime import datetime import os import time -from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY +from test_utils import create_kubernetes_connection, create_docker_connection def test(kubernetes): conn_1 = create('multi-tenancy-1', use_kubernetes=kubernetes) conn_2 = create('multi-tenancy-2', use_kubernetes=kubernetes) - deploy_(conn_1, use_kubernetes=kubernetes) - deploy_(conn_2, use_kubernetes=kubernetes) + deploy_(conn_1) + deploy_(conn_2) res_1 = predict_(conn_1.get_query_addr(), [.1, .2, .3]) res_2 = predict_(conn_2.get_query_addr(), [.1, .2, .3]) @@ -41,17 +41,9 @@ def feature_sum(xs): return [str(sum(x)) for x in xs] -def deploy_(clipper_conn, use_kubernetes=False): - if use_kubernetes: - python_deployer.create_endpoint( - clipper_conn, - "testapp0-model", - "doubles", - feature_sum, - registry=CLIPPER_CONTAINER_REGISTRY) - else: - python_deployer.create_endpoint(clipper_conn, "testapp0-model", - "doubles", feature_sum) +def deploy_(clipper_conn): + python_deployer.create_endpoint(clipper_conn, "testapp0-model", "doubles", + feature_sum) def predict_(addr, x, batch=False): From d18d50993c39768c32839f52a120f98a8419fac0 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 18 Jun 2018 16:24:57 -0700 Subject: [PATCH 57/63] Revert "Remove CLIPPER_CONTAINER_REGISTRY constant in test" This reverts commit 99e25172b8520872a2dea6f526b083d7c7fda7a3. --- .../kubernetes_integration_test.py | 6 ++++-- .../kubernetes_multi_frontend.py | 6 ++++-- integration-tests/kubernetes_namespace.py | 10 +++++++--- integration-tests/multi_tenancy_test.py | 20 +++++++++++++------ 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/integration-tests/kubernetes_integration_test.py b/integration-tests/kubernetes_integration_test.py index 14d536445..e3b935b69 100644 --- a/integration-tests/kubernetes_integration_test.py +++ b/integration-tests/kubernetes_integration_test.py @@ -9,7 +9,8 @@ import time import logging from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state) + fake_model_data, headers, log_clipper_state, + CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException, __registry__ as clipper_registry @@ -33,7 +34,8 @@ def deploy_model(clipper_conn, name, version, link=False): "doubles", fake_model_data, "{}/noop-container:{}".format(clipper_registry, clipper_version), - num_replicas=1) + num_replicas=1, + container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) if link: diff --git a/integration-tests/kubernetes_multi_frontend.py b/integration-tests/kubernetes_multi_frontend.py index d7c38712b..8b427bd5e 100644 --- a/integration-tests/kubernetes_multi_frontend.py +++ b/integration-tests/kubernetes_multi_frontend.py @@ -14,7 +14,8 @@ import logging import yaml from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state) + fake_model_data, headers, log_clipper_state, + CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) @@ -38,7 +39,8 @@ def deploy_model(clipper_conn, name, link=False): "doubles", fake_model_data, "{}/noop-container:{}".format(clipper_registry, clipper_version), - num_replicas=1) + num_replicas=1, + container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) if link: diff --git a/integration-tests/kubernetes_namespace.py b/integration-tests/kubernetes_namespace.py index b9f27af05..0a34d9668 100644 --- a/integration-tests/kubernetes_namespace.py +++ b/integration-tests/kubernetes_namespace.py @@ -12,7 +12,7 @@ from datetime import datetime import os import time -from test_utils import create_kubernetes_connection, create_docker_connection +from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY def test(): @@ -38,8 +38,12 @@ def feature_sum(xs): def deploy_(clipper_conn): - python_deployer.create_endpoint(clipper_conn, "testapp0-model", "doubles", - feature_sum) + python_deployer.create_endpoint( + clipper_conn, + "testapp0-model", + "doubles", + feature_sum, + registry=CLIPPER_CONTAINER_REGISTRY) def predict_(addr, x, batch=False): diff --git a/integration-tests/multi_tenancy_test.py b/integration-tests/multi_tenancy_test.py index 93292686c..763ed1527 100644 --- a/integration-tests/multi_tenancy_test.py +++ b/integration-tests/multi_tenancy_test.py @@ -8,15 +8,15 @@ from datetime import datetime import os import time -from test_utils import create_kubernetes_connection, create_docker_connection +from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY def test(kubernetes): conn_1 = create('multi-tenancy-1', use_kubernetes=kubernetes) conn_2 = create('multi-tenancy-2', use_kubernetes=kubernetes) - deploy_(conn_1) - deploy_(conn_2) + deploy_(conn_1, use_kubernetes=kubernetes) + deploy_(conn_2, use_kubernetes=kubernetes) res_1 = predict_(conn_1.get_query_addr(), [.1, .2, .3]) res_2 = predict_(conn_2.get_query_addr(), [.1, .2, .3]) @@ -41,9 +41,17 @@ def feature_sum(xs): return [str(sum(x)) for x in xs] -def deploy_(clipper_conn): - python_deployer.create_endpoint(clipper_conn, "testapp0-model", "doubles", - feature_sum) +def deploy_(clipper_conn, use_kubernetes=False): + if use_kubernetes: + python_deployer.create_endpoint( + clipper_conn, + "testapp0-model", + "doubles", + feature_sum, + registry=CLIPPER_CONTAINER_REGISTRY) + else: + python_deployer.create_endpoint(clipper_conn, "testapp0-model", + "doubles", feature_sum) def predict_(addr, x, batch=False): From 8a184ac8ffc69a9160b82a87b9d374a6b869e69c Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 18 Jun 2018 16:25:51 -0700 Subject: [PATCH 58/63] Add CLIPPER_CONTAINER_REGISTRY back --- integration-tests/test_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index fe89e80fb..17239625b 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -35,6 +35,8 @@ def __str__(self): # range of ports where available ports can be found PORT_RANGE = [34256, 50000] +# The dockerhub account we are pushing kubernetes images to +CLIPPER_CONTAINER_REGISTRY = 'clippertesting' def get_docker_client(): if "DOCKER_API_VERSION" in os.environ: From 1b70dd03e15fcdb1f8df6e9063b1d4f8e2718bd1 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 19 Jun 2018 00:50:19 -0700 Subject: [PATCH 59/63] Format code --- integration-tests/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 17239625b..91bf64a59 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -38,6 +38,7 @@ def __str__(self): # The dockerhub account we are pushing kubernetes images to CLIPPER_CONTAINER_REGISTRY = 'clippertesting' + def get_docker_client(): if "DOCKER_API_VERSION" in os.environ: return docker.from_env(version=os.environ["DOCKER_API_VERSION"]) From 955508637226fdd5f9047a2da615afb815a2caf1 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 19 Jun 2018 13:40:51 -0700 Subject: [PATCH 60/63] Fix docker login in unnitest --- bin/ci_checks.sh | 3 +++ bin/run_ci.sh | 2 ++ integration-tests/test_utils.py | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/bin/ci_checks.sh b/bin/ci_checks.sh index c4777eadc..b20a8c602 100755 --- a/bin/ci_checks.sh +++ b/bin/ci_checks.sh @@ -52,6 +52,9 @@ kubectl get nodes # Set kubectl proxy for k8s tests later kubectl proxy --port 8080 & +# Login to clippertesting dockerhub here +docker login --username="clippertesting" --password=$CLIPPER_TESTING_DOCKERHUB_PASSWORD + if [[ $run_all = "true" ]]; then $DIR/check_format.sh $DIR/run_unittests.sh diff --git a/bin/run_ci.sh b/bin/run_ci.sh index ad3528e8f..3b5c30e01 100755 --- a/bin/run_ci.sh +++ b/bin/run_ci.sh @@ -41,6 +41,7 @@ docker run --rm --network=host -v /var/run/docker.sock:/var/run/docker.sock -v / -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e CLIPPER_REGISTRY=$CLIPPER_REGISTRY \ + -e CLIPPER_TESTING_DOCKERHUB_PASSWORD=$CLIPPER_TESTING_DOCKERHUB_PASSWORD \ $CLIPPER_REGISTRY/unittests:$sha_tag # Python 3 unittests @@ -52,4 +53,5 @@ docker run --rm --network=host -v /var/run/docker.sock:/var/run/docker.sock -v / -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e CLIPPER_REGISTRY=$CLIPPER_REGISTRY \ + -e CLIPPER_TESTING_DOCKERHUB_PASSWORD=$CLIPPER_TESTING_DOCKERHUB_PASSWORD \ $CLIPPER_REGISTRY/py35tests:$sha_tag diff --git a/integration-tests/test_utils.py b/integration-tests/test_utils.py index 91bf64a59..56a43d79b 100644 --- a/integration-tests/test_utils.py +++ b/integration-tests/test_utils.py @@ -38,6 +38,9 @@ def __str__(self): # The dockerhub account we are pushing kubernetes images to CLIPPER_CONTAINER_REGISTRY = 'clippertesting' +# USE_MINIKUBE == True -> useInternalIP = True +USE_MINIKUBE = False + def get_docker_client(): if "DOCKER_API_VERSION" in os.environ: @@ -138,6 +141,7 @@ def create_kubernetes_connection(cleanup=False, logger.info("Cleaning up Kubernetes Cluster {}".format(cleanup_name)) cm = KubernetesContainerManager( cluster_name=cleanup_name, + useInternalIP=USE_MINIKUBE, kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.stop_all() @@ -149,6 +153,7 @@ def create_kubernetes_connection(cleanup=False, cluster_name=new_name, kubernetes_proxy_addr=kubernetes_proxy_addr, namespace=namespace, + useInternalIP=USE_MINIKUBE, create_namespace_if_not_exists=True) cl = ClipperConnection(cm) cl.start_clipper(num_frontend_replicas=num_frontend_replicas) @@ -157,6 +162,7 @@ def create_kubernetes_connection(cleanup=False, try: cm = KubernetesContainerManager( cluster_name=connect_name, + useInternalIP=USE_MINIKUBE, kubernetes_proxy_addr=kubernetes_proxy_addr) cl = ClipperConnection(cm) cl.connect() From e5c285259aa306539817bb24ec9a02a3053281ae Mon Sep 17 00:00:00 2001 From: simon-mo Date: Wed, 20 Jun 2018 05:38:40 +0000 Subject: [PATCH 61/63] Fix image name in R test --- integration-tests/r_integration_test/deploy_query_test_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/r_integration_test/deploy_query_test_model.py b/integration-tests/r_integration_test/deploy_query_test_model.py index 1021683f6..b8c4ac5ef 100644 --- a/integration-tests/r_integration_test/deploy_query_test_model.py +++ b/integration-tests/r_integration_test/deploy_query_test_model.py @@ -32,7 +32,7 @@ MODEL_NAME = "rtest-model" MODEL_VERSION = 1 -MODEL_IMAGE_NAME = "rtest-model:1" +MODEL_IMAGE_NAME = "default-cluster-rtest-model:1" def create_application(clipper_conn): From cd8d9d11e036363555a5689937528c90cde7343e Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 20 Jun 2018 10:50:12 -0700 Subject: [PATCH 62/63] Fix CLIPPER_CONTAINER_REGISTRY in k8s metrics test --- integration-tests/clipper_metric_kube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index 819406231..641c4fc1f 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -14,7 +14,8 @@ import logging import yaml from test_utils import (create_kubernetes_connection, BenchmarkException, - fake_model_data, headers, log_clipper_state) + fake_model_data, headers, log_clipper_state, + CLIPPER_CONTAINER_REGISTRY) cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath("%s/../clipper_admin" % cur_dir)) from clipper_admin import __version__ as clipper_version, CLIPPER_TEMP_DIR, ClipperException, __registry__ as clipper_registry @@ -39,8 +40,7 @@ def deploy_model(clipper_conn, name, link=False): fake_model_data, "{}/noop-container:{}".format(clipper_registry, clipper_version), num_replicas=2, # We set it to 2 for metric purpose. - container_registry= - "568959175238.dkr.ecr.us-west-1.amazonaws.com/clipper") + container_registry=CLIPPER_CONTAINER_REGISTRY) time.sleep(10) if link: From 0f58770a7adafbabeeeacad0da25c860a7e16ccf Mon Sep 17 00:00:00 2001 From: simon-mo Date: Thu, 21 Jun 2018 00:33:46 +0000 Subject: [PATCH 63/63] Refresh k8s metric tests;fix variable reuse --- integration-tests/clipper_metric_kube.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index 641c4fc1f..cd582c4f4 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -186,10 +186,10 @@ def check_target_health(metric_addr): retry_count = MAX_RETRY while retry_count: try: - name = prefix + name + metric_key = prefix + name if spec['type'] == 'Histogram' or spec['type'] == 'Summary': - name += '_sum' - res = get_matched_query(metric_api_addr, name) + metric_key += '_sum' + res = get_matched_query(metric_api_addr, metric_key) parse_res_and_assert_node(res, node_num=2) retry_count = 0 except AssertionError as e: @@ -203,6 +203,9 @@ def check_target_health(metric_addr): time.sleep(10) pass # try again. + # One metric is there means all metric there + break + logger.info("Test 2 Passed") # End Metric Check if not os.path.exists(CLIPPER_TEMP_DIR):