Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Throw errors on model deletion failures #834

Merged
merged 8 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Features
### Enhancements
### Bug Fixes
* Throw errors on model deletion failures ([#834](https://github.com/opensearch-project/k-NN/pull/834))
### Infrastructure
* Adding filter type to filtering release configs ([#792](https://github.com/opensearch-project/k-NN/pull/792))
* Add CHANGELOG ([#800](https://github.com/opensearch-project/k-NN/pull/800))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.opensearch.action.search.SearchResponse;
import org.opensearch.client.Request;
import org.opensearch.client.Response;
import org.opensearch.client.ResponseException;
import org.opensearch.common.Strings;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
Expand All @@ -20,14 +21,12 @@
import org.opensearch.knn.indices.ModelMetadata;
import org.opensearch.knn.indices.ModelState;
import org.opensearch.knn.plugin.KNNPlugin;
import org.opensearch.knn.plugin.transport.DeleteModelResponse;
import org.opensearch.rest.RestStatus;
import org.opensearch.search.SearchHit;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;

import static org.opensearch.knn.TestUtils.KNN_BWC_PREFIX;
Expand All @@ -44,7 +43,6 @@
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NLIST;
import static org.opensearch.knn.common.KNNConstants.NMSLIB_NAME;
import static org.opensearch.knn.common.KNNConstants.MODEL_INDEX_NAME;

public class ModelIT extends AbstractRestartUpgradeTestCase {
private static final String TEST_MODEL_INDEX = KNN_BWC_PREFIX + "test-model-index";
Expand Down Expand Up @@ -153,25 +151,8 @@ public void testDeleteTrainingModel() throws Exception {
String restURI = String.join("/", KNNPlugin.KNN_BASE_URI, MODELS, TEST_MODEL_ID_TRAINING);
Request request = new Request("DELETE", restURI);

Response response = client().performRequest(request);
assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode()));

assertEquals(3, getDocCount(MODEL_INDEX_NAME));

String responseBody = EntityUtils.toString(response.getEntity());
assertNotNull(responseBody);

Map<String, Object> responseMap = createParser(XContentType.JSON.xContent(), responseBody).map();

assertEquals(TEST_MODEL_ID_TRAINING, responseMap.get(MODEL_ID));
assertEquals("failed", responseMap.get(DeleteModelResponse.RESULT));

String errorMessage = String.format(
Locale.ROOT,
"Cannot delete model \"%s\". Model is still in " + "training",
TEST_MODEL_ID_TRAINING
);
assertEquals(errorMessage, responseMap.get(DeleteModelResponse.ERROR_MSG));
ResponseException ex = expectThrows(ResponseException.class, () -> client().performRequest(request));
assertEquals(RestStatus.CONFLICT.getStatus(), ex.getResponse().getStatusLine().getStatusCode());
}
}

Expand All @@ -181,7 +162,6 @@ public static void wipeAllModels() throws IOException {
if (!isRunningAgainstOldCluster()) {
deleteKNNModel(TEST_MODEL_ID);
deleteKNNModel(TEST_MODEL_ID_DEFAULT);
deleteKNNModel(TEST_MODEL_ID_TRAINING);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.common.exception;

import org.opensearch.OpenSearchException;
import org.opensearch.common.logging.LoggerMessageFormat;
import org.opensearch.rest.RestStatus;

/**
* Exception thrown when a model is deleted while it is in the training state. The RestStatus associated with this
* exception should be a {@link RestStatus#CONFLICT} because the request cannot be deleted due to the model being in
* the training state.
*/
public class DeleteModelWhenInTrainStateException extends OpenSearchException {
/**
* Constructor
*
* @param msg detailed exception message
* @param args arguments of the message
*/
public DeleteModelWhenInTrainStateException(String msg, Object... args) {
super(LoggerMessageFormat.format(msg, args));
}

@Override
public RestStatus status() {
return RestStatus.CONFLICT;
}
}
20 changes: 9 additions & 11 deletions src/main/java/org/opensearch/knn/indices/ModelDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.opensearch.index.IndexNotFoundException;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.common.ThreadContextHelper;
import org.opensearch.knn.common.exception.DeleteModelWhenInTrainStateException;
import org.opensearch.knn.plugin.transport.DeleteModelResponse;
import org.opensearch.knn.plugin.transport.GetModelResponse;
import org.opensearch.knn.plugin.transport.RemoveModelFromCacheAction;
Expand Down Expand Up @@ -174,8 +175,6 @@ public interface ModelDao {
final class OpenSearchKNNModelDao implements ModelDao {

public static Logger logger = LogManager.getLogger(ModelDao.class);
private static final String DELETED = "deleted";
private static final String FAILED = "failed";

private int numberOfShards;
private int numberOfReplicas;
Expand Down Expand Up @@ -487,9 +486,8 @@ public boolean isModelInGraveyard(String modelId) {
public void delete(String modelId, ActionListener<DeleteModelResponse> listener) {
// If the index is not created, there is no need to delete the model
if (!isCreated()) {
logger.error("Cannot delete model \"" + modelId + "\". Model index " + MODEL_INDEX_NAME + "does not exist.");
String errorMessage = String.format("Cannot delete model \"%s\". Model index does not exist", modelId);
listener.onResponse(new DeleteModelResponse(modelId, FAILED, errorMessage));
String errorMessage = String.format("Cannot delete model [%s]. Model index [%s] does not exist", modelId, MODEL_INDEX_NAME);
listener.onFailure(new ResourceNotFoundException(errorMessage));
return;
}

Expand All @@ -503,7 +501,7 @@ public void delete(String modelId, ActionListener<DeleteModelResponse> listener)
// Get Model to check if model is in TRAINING
get(modelId, ActionListener.wrap(getModelStep::onResponse, exception -> {
if (exception instanceof ResourceNotFoundException) {
String errorMessage = String.format("Unable to delete model \"%s\". Model does not exist", modelId);
String errorMessage = String.format("Unable to delete model [%s]. Model does not exist", modelId);
ResourceNotFoundException resourceNotFoundException = new ResourceNotFoundException(errorMessage);
removeModelIdFromGraveyardOnFailure(modelId, resourceNotFoundException, getModelStep);
} else {
Expand All @@ -514,8 +512,8 @@ public void delete(String modelId, ActionListener<DeleteModelResponse> listener)
getModelStep.whenComplete(getModelResponse -> {
// If model is in Training state, fail delete model request
if (ModelState.TRAINING == getModelResponse.getModel().getModelMetadata().getState()) {
String errorMessage = String.format("Cannot delete model \"%s\". Model is still in training", modelId);
listener.onResponse(new DeleteModelResponse(modelId, FAILED, errorMessage));
String errorMessage = String.format("Cannot delete model [%s]. Model is still in training", modelId);
listener.onFailure(new DeleteModelWhenInTrainStateException(errorMessage));
return;
}

Expand Down Expand Up @@ -544,8 +542,8 @@ public void delete(String modelId, ActionListener<DeleteModelResponse> listener)
// If model is not deleted, remove modelId from model graveyard and return with error message
if (deleteResponse.getResult() != DocWriteResponse.Result.DELETED) {
updateModelGraveyardToDelete(modelId, true, unblockModelIdStep, Optional.empty());
String errorMessage = String.format("Model \" %s \" does not exist", modelId);
listener.onResponse(new DeleteModelResponse(modelId, deleteResponse.getResult().getLowercase(), errorMessage));
String errorMessage = String.format("Model [%s] does not exist", modelId);
listener.onFailure(new ResourceNotFoundException(errorMessage));
return;
}

Expand All @@ -569,7 +567,7 @@ public void delete(String modelId, ActionListener<DeleteModelResponse> listener)

unblockModelIdStep.whenComplete(acknowledgedResponse -> {
// After clearing the cache, if there are no errors return the response
listener.onResponse(new DeleteModelResponse(modelId, DELETED, null));
listener.onResponse(new DeleteModelResponse(modelId));

}, listener::onFailure);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

import org.opensearch.action.ActionResponse;
import org.opensearch.common.Nullable;
import org.opensearch.common.Strings;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.ToXContentObject;
Expand All @@ -29,16 +28,40 @@ public class DeleteModelResponse extends ActionResponse implements ToXContentObj

public static final String RESULT = "result";
public static final String ERROR_MSG = "error";
private static final String DELETED = "deleted";
private final String modelID;
private final String result;
private final String errorMessage;

/**
* Ctor to build delete model response.
* @deprecated
* Returning errors through {@link DeleteModelResponse} should not be done. Instead, if there is an
* error, throw/return a suitable exception. Use {@link DeleteModelResponse#DeleteModelResponse(String)} to
* construct valid responses instead.
*
* @param modelID ID of the model that is deleted
* @param result Resulting action of the deletion.
* @param errorMessage Error message to be returned to the user
*/
@Deprecated
public DeleteModelResponse(String modelID, String result, @Nullable String errorMessage) {
this.modelID = modelID;
this.result = result;
this.errorMessage = errorMessage;
}

/**
* Ctor to build delete model response
*
* @param modelID ID of the model that is deleted
*/
public DeleteModelResponse(String modelID) {
this.modelID = modelID;
this.result = DELETED;
this.errorMessage = null;
}

public DeleteModelResponse(StreamInput in) throws IOException {
super(in);
this.modelID = in.readString();
Expand All @@ -63,16 +86,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
/* Response should look like below:
{
"model_id": "my_model_id"
"result": "not_found",
"error": "Model my_model_id doesn't exist"
"result": "deleted"
}
*/
builder.startObject();
builder.field(MODEL_ID, getModelID());
builder.field(RESULT, getResult());
if (Strings.hasText(errorMessage)) {
builder.field(ERROR_MSG, getErrorMessage());
}
builder.endObject();
return builder;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

package org.opensearch.knn.plugin.transport;

import lombok.extern.log4j.Log4j2;
import org.opensearch.action.ActionListener;
import org.opensearch.action.support.ActionFilters;
import org.opensearch.action.support.HandledTransportAction;
Expand All @@ -21,6 +22,7 @@
import org.opensearch.tasks.Task;
import org.opensearch.transport.TransportService;

@Log4j2
public class DeleteModelTransportAction extends HandledTransportAction<DeleteModelRequest, DeleteModelResponse> {

private final ModelDao modelDao;
Expand All @@ -37,7 +39,10 @@ public DeleteModelTransportAction(TransportService transportService, ActionFilte
protected void doExecute(Task task, DeleteModelRequest request, ActionListener<DeleteModelResponse> listener) {
ThreadContextHelper.runWithStashedThreadContext(client, () -> {
String modelID = request.getModelID();
modelDao.delete(modelID, listener);
modelDao.delete(modelID, ActionListener.wrap(listener::onResponse, e -> {
log.error(e);
listener.onFailure(e);
}));
});
}
}
43 changes: 25 additions & 18 deletions src/test/java/org/opensearch/knn/indices/ModelDaoTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.junit.BeforeClass;
import org.opensearch.ExceptionsHelper;
import org.opensearch.ResourceAlreadyExistsException;
import org.opensearch.ResourceNotFoundException;
import org.opensearch.action.ActionListener;
import org.opensearch.action.DocWriteResponse;
import org.opensearch.action.StepListener;
Expand All @@ -32,6 +33,7 @@
import org.opensearch.index.IndexNotFoundException;
import org.opensearch.index.engine.VersionConflictEngineException;
import org.opensearch.knn.KNNSingleNodeTestCase;
import org.opensearch.knn.common.exception.DeleteModelWhenInTrainStateException;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.util.KNNEngine;
import org.opensearch.knn.plugin.transport.DeleteModelResponse;
Expand Down Expand Up @@ -500,10 +502,13 @@ public void testDelete() throws IOException, InterruptedException {
int dimension = 2;

final CountDownLatch inProgressLatch = new CountDownLatch(1);
ActionListener<DeleteModelResponse> deleteModelIndexDoesNotExistListener = ActionListener.wrap(response -> {
assertEquals(FAILED, response.getResult());
inProgressLatch.countDown();
}, exception -> fail("Unable to delete the model: " + exception));
ActionListener<DeleteModelResponse> deleteModelIndexDoesNotExistListener = ActionListener.wrap(
response -> fail("Deleting model when model index does not exist should throw ResourceNotFoundException"),
exception -> {
assertTrue(exception instanceof ResourceNotFoundException);
inProgressLatch.countDown();
}
);
// model index doesnt exist
modelDao.delete(modelId, deleteModelIndexDoesNotExistListener);
assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS));
Expand All @@ -512,25 +517,27 @@ public void testDelete() throws IOException, InterruptedException {

// Model does not exist
final CountDownLatch inProgressLatch1 = new CountDownLatch(1);
ActionListener<DeleteModelResponse> deleteModelDoesNotExistListener = ActionListener.wrap(Assert::assertNull, exception -> {
assertNotNull(exception);
assertTrue(exception.getMessage().contains(modelId));
assertTrue(exception.getMessage().contains("Model does not exist"));
assertFalse(modelDao.isModelInGraveyard(modelId));
inProgressLatch1.countDown();
});
ActionListener<DeleteModelResponse> deleteModelDoesNotExistListener = ActionListener.wrap(
response -> fail("Deleting model when model does not exist should throw ResourceNotFoundException"),
exception -> {
assertTrue(exception instanceof ResourceNotFoundException);
assertFalse(modelDao.isModelInGraveyard(modelId));
inProgressLatch1.countDown();
}
);

modelDao.delete(modelId, deleteModelDoesNotExistListener);
assertTrue(inProgressLatch1.await(60, TimeUnit.SECONDS));

final CountDownLatch inProgressLatch2 = new CountDownLatch(1);
ActionListener<DeleteModelResponse> deleteModelTrainingListener = ActionListener.wrap(response -> {
assertEquals(modelId, response.getModelID());
assertEquals(FAILED, response.getResult());
String errorMessage = String.format("Cannot delete model \"%s\". Model is still in training", modelId);
assertEquals(errorMessage, response.getErrorMessage());
inProgressLatch2.countDown();
}, exception -> fail("Unable to delete model: " + exception));
ActionListener<DeleteModelResponse> deleteModelTrainingListener = ActionListener.wrap(
response -> fail("Deleting model when model does not exist should throw ResourceNotFoundException"),
exception -> {
assertTrue(exception instanceof DeleteModelWhenInTrainStateException);
assertFalse(modelDao.isModelInGraveyard(modelId));
inProgressLatch2.countDown();
}
);

// model id exists and model is still in Training
Model model = new Model(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.plugin.KNNPlugin;
import org.opensearch.knn.plugin.transport.DeleteModelResponse;
import org.opensearch.rest.RestStatus;

import java.util.List;
Expand Down Expand Up @@ -107,23 +106,8 @@ public void testDeleteTrainingModel() throws Exception {
String deleteModelRestURI = String.join("/", KNNPlugin.KNN_BASE_URI, MODELS, modelId);
Request deleteModelRequest = new Request("DELETE", deleteModelRestURI);

Response deleteModelResponse = client().performRequest(deleteModelRequest);
assertEquals(
deleteModelRequest.getEndpoint() + ": failed",
RestStatus.OK,
RestStatus.fromCode(deleteModelResponse.getStatusLine().getStatusCode())
);

responseBody = EntityUtils.toString(deleteModelResponse.getEntity());
assertNotNull(responseBody);

responseMap = createParser(XContentType.JSON.xContent(), responseBody).map();

assertEquals(modelId, responseMap.get(MODEL_ID));
assertEquals("failed", responseMap.get(DeleteModelResponse.RESULT));

String errorMessage = String.format("Cannot delete model \"%s\". Model is still in training", modelId);
assertEquals(errorMessage, responseMap.get(DeleteModelResponse.ERROR_MSG));
ResponseException ex = expectThrows(ResponseException.class, () -> client().performRequest(deleteModelRequest));
assertEquals(RestStatus.CONFLICT.getStatus(), ex.getResponse().getStatusLine().getStatusCode());

// need to wait for training operation as it's required for after test cleanup
assertTrainingSucceeds(modelId, NUM_OF_ATTEMPTS, DELAY_MILLI_SEC);
Expand All @@ -136,7 +120,7 @@ public void testDeleteModelFailsInvalid() throws Exception {
Request request = new Request("DELETE", restURI);

ResponseException ex = expectThrows(ResponseException.class, () -> client().performRequest(request));
assertTrue(ex.getMessage().contains(modelId));
assertEquals(RestStatus.NOT_FOUND.getStatus(), ex.getResponse().getStatusLine().getStatusCode());
}

// Test Train Model -> Delete Model -> Train Model with same modelId
Expand Down
Loading