diff --git a/doc/source/python/python_component.md b/doc/source/python/python_component.md index 2358ecd03d..5a023ce1ea 100644 --- a/doc/source/python/python_component.md +++ b/doc/source/python/python_component.md @@ -144,8 +144,81 @@ class ModelWithMetrics(object): return {"system":"production"} ``` +## REST Health Endpoint +If you wish to add a REST health point, you can implement the `health_status` method with signature as shown below: +```python + def health_status(self) -> Union[np.ndarray, List, str, bytes]: +``` + +You can use this to verify that your service can respond to HTTP calls after you have built your docker image and also +as kubernetes liveness and readiness probes to verify that your model is healthy. + +A simple example is shown below: + +```python +class ModelWithHealthEndpoint(object): + def predict(self, X, features_names): + return X + def health_status(self): + response = self.predict([1, 2], ["f1", "f2"]) + assert len(response) == 2, "health check returning bad predictions" # or some other simple validation + return response +``` +When you use `seldon-core-microservice` to start the HTTP server, you can verify that the model is up and running by +checking the `/health/status` endpoint: +``` +$ curl localhost:5000/health/status +{"data":{"names":[],"tensor":{"shape":[2],"values":[1,2]}},"meta":{}} +``` + +Additionally, you can also use the `/health/ping` endpoint if you want a lightweight call that just checks that +the HTTP server is up: + +```0 +$ curl localhost:5000/health/ping +pong% +``` + +You can also override the default liveness and readiness probes and use HTTP health endpoints by adding them in your +`SeldonDeployment` YAML. You can modify the parameters for the probes to suit your reliability needs without putting +too much stress on the container. Read more about these probes in the +[kubernetes documentation](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/). +An example is shown below: + +```yaml +apiVersion: machinelearning.seldon.io/v1alpha2 +kind: SeldonDeployment +spec: + name: my-app + predictors: + - componentSpecs: + - spec: + containers: + - image: my-app-image:version + name: classifier + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 60 + periodSeconds: 5 + successThreshold: 1 + httpGet: + path: /health/status + port: http + scheme: HTTP + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + initialDelaySeconds: 20 + periodSeconds: 5 + successThreshold: 1 + httpGet: + path: /health/status + port: http + scheme: HTTP + timeoutSeconds: 1 +``` ## Low level Methods If you want more control you can provide a low-level methods that will provide as input the raw proto buffer payloads. The signatures for these are shown below for release `sedon_core>=0.2.6.1`: @@ -162,6 +235,8 @@ If you want more control you can provide a low-level methods that will provide a def route_raw(self, msg: prediction_pb2.SeldonMessage) -> prediction_pb2.SeldonMessage: def aggregate_raw(self, msgs: prediction_pb2.SeldonMessageList) -> prediction_pb2.SeldonMessage: + + def health_status_raw(self) -> prediction_pb2.SeldonMessage: ``` ## User Defined Exceptions diff --git a/python/seldon_core/seldon_methods.py b/python/seldon_core/seldon_methods.py index 44f0071afb..259ec44c4a 100644 --- a/python/seldon_core/seldon_methods.py +++ b/python/seldon_core/seldon_methods.py @@ -14,6 +14,7 @@ client_transform_output, client_transform_input, client_send_feedback, + client_health_status, SeldonNotImplementedError, ) from seldon_core.flask_utils import SeldonMicroserviceException @@ -342,3 +343,26 @@ def aggregate( return construct_response_json( user_model, False, request["seldonMessages"][0], client_response ) + + +def health_status(user_model: Any) -> Union[prediction_pb2.SeldonMessage, List, Dict]: + """ + Call the user model to check the health of the model + + Parameters + ---------- + user_model + User defined class instance + Returns + ------- + Health check output + """ + + if hasattr(user_model, "health_status_raw"): + try: + return user_model.health_status_raw() + except SeldonNotImplementedError: + pass + + client_response = client_health_status(user_model) + return construct_response_json(user_model, False, {}, client_response) diff --git a/python/seldon_core/user_model.py b/python/seldon_core/user_model.py index b7c9a5e74b..40713f9b59 100644 --- a/python/seldon_core/user_model.py +++ b/python/seldon_core/user_model.py @@ -103,6 +103,12 @@ def aggregate_raw( ) -> prediction_pb2.SeldonMessage: raise SeldonNotImplementedError("aggregate_raw is not implemented") + def health_status(self) -> Union[np.ndarray, List, str, bytes]: + raise SeldonNotImplementedError("health is not implemented") + + def health_status_raw(self) -> prediction_pb2.SeldonMessage: + raise SeldonNotImplementedError("health_raw is not implemented") + def client_custom_tags(user_model: SeldonComponent) -> Dict: """ @@ -417,3 +423,23 @@ def client_aggregate( return user_model.aggregate(features_list, feature_names_list) else: raise SeldonNotImplementedError("Aggregate not defined") + + +def client_health_status( + user_model: SeldonComponent, +) -> Union[np.ndarray, List, str, bytes]: + """ + Perform a health check + + Parameters + ---------- + user_model + A Seldon user model + Returns + ------- + Health check results + """ + if hasattr(user_model, "health_status"): + return user_model.health_status() + else: + raise SeldonNotImplementedError("health_status not defined") diff --git a/python/seldon_core/version.py b/python/seldon_core/version.py index edd1960781..d9a5ca5561 100644 --- a/python/seldon_core/version.py +++ b/python/seldon_core/version.py @@ -1 +1 @@ -__version__ = '0.5.0.3' +__version__ = "0.5.0.3" diff --git a/python/seldon_core/wrapper.py b/python/seldon_core/wrapper.py index cbb1e2f3e5..70db81a380 100644 --- a/python/seldon_core/wrapper.py +++ b/python/seldon_core/wrapper.py @@ -97,6 +97,20 @@ def Aggregate(): logger.debug("REST Response: %s", response) return jsonify(response) + @app.route("/health/ping", methods=["GET"]) + def HealthPing(): + """ + Lightweight endpoint to check the liveness of the REST endpoint + """ + return "pong" + + @app.route("/health/status", methods=["GET"]) + def HealthStatus(): + logger.debug("REST Health Status Request") + response = seldon_core.seldon_methods.health_status(user_model) + logger.debug("REST Health Status Response: %s", response) + return jsonify(response) + return app diff --git a/python/tests/test_model_microservice.py b/python/tests/test_model_microservice.py index 33c9e990d4..dcd31101cf 100644 --- a/python/tests/test_model_microservice.py +++ b/python/tests/test_model_microservice.py @@ -18,6 +18,8 @@ from tensorflow.core.framework.tensor_pb2 import TensorProto import tensorflow as tf +HEALTH_PING_URL = "/health/ping" +HEALTH_STATUS_URL = "/health/status" """ Checksum of bytes. Used to check data integrity of binData passed in multipart/form-data request @@ -38,6 +40,8 @@ def rs232_checksum(the_bytes): class UserObject(SeldonComponent): + HEALTH_STATUS_REPONSE = [0.123] + def __init__(self, metrics_ok=True, ret_nparray=False, ret_meta=False): self.metrics_ok = metrics_ok self.ret_nparray = ret_nparray @@ -77,8 +81,13 @@ def metrics(self): else: return [{"type": "BAD", "key": "mycounter", "value": 1}] + def health_status(self): + return self.predict(self.HEALTH_STATUS_REPONSE, ["some_float"]) + class UserObjectLowLevel(SeldonComponent): + HEALTH_STATUS_RAW_RESPONSE = [123.456, 7.89] + def __init__(self, metrics_ok=True, ret_nparray=False): self.metrics_ok = metrics_ok self.ret_nparray = ret_nparray @@ -101,6 +110,9 @@ def send_feedback_rest(self, request): def send_feedback_grpc(self, request): print("Feedback called") + def health_status_raw(self): + return {"data": {"ndarray": self.HEALTH_STATUS_RAW_RESPONSE}} + class UserObjectLowLevelWithStatusInResponse(SeldonComponent): def __init__(self, metrics_ok=True, ret_nparray=False): @@ -539,6 +551,36 @@ def test_model_seldon_json_ok(): assert rv.status_code == 200 +def test_model_health_ping(): + user_object = UserObject() + app = get_rest_microservice(user_object) + client = app.test_client() + rv = client.get(HEALTH_PING_URL) + assert rv.status_code == 200 + assert rv.data == b"pong" + + +def test_model_health_status(): + user_object = UserObject() + app = get_rest_microservice(user_object) + client = app.test_client() + rv = client.get(HEALTH_STATUS_URL) + assert rv.status_code == 200 + j = json.loads(rv.data) + print(j) + assert j["data"]["tensor"]["values"] == UserObject.HEALTH_STATUS_REPONSE + + +def test_model_health_status_raw(): + user_object = UserObjectLowLevel() + app = get_rest_microservice(user_object) + client = app.test_client() + rv = client.get(HEALTH_STATUS_URL) + assert rv.status_code == 200 + j = json.loads(rv.data) + assert j["data"]["ndarray"] == UserObjectLowLevel.HEALTH_STATUS_RAW_RESPONSE + + def test_proto_ok(): user_object = UserObject() app = SeldonModelGRPC(user_object)