elastic · arteam · Oct 18, 2021 · Oct 14, 2021 · Oct 18, 2021 · Oct 18, 2021
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java
@@ -37,13 +37,13 @@
 import org.elasticsearch.common.compress.CompressedXContent;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeUnit;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.indices.recovery.RecoverySettings;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.transport.RemoteClusterService;
 import org.elasticsearch.transport.SniffConnectionStrategy;
+import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -316,7 +316,7 @@ public void testClusterHealthNotFoundIndex() throws IOException {
         assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
         assertNoIndices(response);
         assertWarnings("The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a " +
-            "future version. Set the [es.cluster_health.request_timeout_200] system property to [true] to suppress this message and " +
+            "future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and " +
             "opt in to the future behaviour now.");
     }
 

diff --git a/docs/reference/cluster/health.asciidoc b/docs/reference/cluster/health.asciidoc
@@ -97,6 +97,11 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms]
     provided or better, i.e. `green` > `yellow` > `red`. By default, will not
     wait for any status.
 
+`return_200_for_cluster_health_timeout`::
+   (Optional, Boolean) A boolean value which controls whether to return HTTP 200
+   status code instead of HTTP 408 in case of a cluster health timeout from
+   the server side. Defaults to false.
+
 [[cluster-health-api-response-body]]
 ==== {api-response-body-title}
 

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json
@@ -102,6 +102,10 @@
           "red"
         ],
         "description":"Wait until cluster is in a specific state"
+      },
+      "return_200_for_cluster_health_timeout":{
+        "type":"boolean",
+        "description":"Whether to return HTTP 200 instead of 408 in case of a cluster health timeout from the server side"
       }
     }
   }

diff --git a/...-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml b/...-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml
@@ -35,3 +35,25 @@
   - match:     { initializing_shards:     0 }
   - match:     { unassigned_shards:       0 }
   - gte:       { number_of_pending_tasks: 0 }
+
+---
+"cluster health request timeout with 200 response code":
+  - skip:
+      version: " - 7.99.99"
+      reason: "return_200_for_cluster_health_timeout exists only in 8.0.0; re-enable in 7.16+ when back-ported"
+  - do:
+      cluster.health:
+        timeout: 1ms
+        wait_for_active_shards: 5
+        return_200_for_cluster_health_timeout: true
+
+  - is_true:   cluster_name
+  - is_true:   timed_out
+  - gte:       { number_of_nodes:         1 }
+  - gte:       { number_of_data_nodes:    1 }
+  - match:     { active_primary_shards:   0 }
+  - match:     { active_shards:           0 }
+  - match:     { relocating_shards:       0 }
+  - match:     { initializing_shards:     0 }
+  - match:     { unassigned_shards:       0 }
+  - gte:       { number_of_pending_tasks: 0 }
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java
@@ -35,6 +35,8 @@ public class ClusterHealthRequest extends MasterNodeReadRequest<ClusterHealthReq
     private ActiveShardCount waitForActiveShards = ActiveShardCount.NONE;
     private String waitForNodes = "";
     private Priority waitForEvents = null;
+    private boolean return200ForClusterHealthTimeout;
+
     /**
      * Only used by the high-level REST Client. Controls the details level of the health information returned.
      * The default value is 'cluster'.
@@ -67,6 +69,9 @@ public ClusterHealthRequest(StreamInput in) throws IOException {
         } else {
             indicesOptions = IndicesOptions.lenientExpandOpen();
         }
+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
+            return200ForClusterHealthTimeout = in.readBoolean();
+        }
     }
 
     @Override
@@ -97,6 +102,11 @@ public void writeTo(StreamOutput out) throws IOException {
         if (out.getVersion().onOrAfter(Version.V_7_2_0)) {
             indicesOptions.writeIndicesOptions(out);
         }
+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
+            out.writeBoolean(return200ForClusterHealthTimeout);
+        } else if (return200ForClusterHealthTimeout) {
+            throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
+        }
     }
 
     @Override
@@ -240,6 +250,18 @@ public Priority waitForEvents() {
         return this.waitForEvents;
     }
 
+    public boolean doesReturn200ForClusterHealthTimeout() {
+        return return200ForClusterHealthTimeout;
+    }
+
+    /**
+     * Sets whether to return HTTP 200 status code instead of HTTP 408 in case of a
+     * cluster health timeout from the server side.
+     */
+    public void return200ForClusterHealthTimeout(boolean return200ForClusterHealthTimeout) {
+        this.return200ForClusterHealthTimeout = return200ForClusterHealthTimeout;
+    }
+
     /**
      * Set the level of detail for the health information to be returned.
      * Only used by the high-level REST Client.

diff --git a/...er/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java b/...er/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java
@@ -8,24 +8,25 @@
 
 package org.elasticsearch.action.admin.cluster.health;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionResponse;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.cluster.health.ClusterIndexHealth;
 import org.elasticsearch.cluster.health.ClusterStateHealth;
-import org.elasticsearch.common.logging.DeprecationLogger;
-import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.logging.DeprecationLogger;
+import org.elasticsearch.common.xcontent.StatusToXContentObject;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.rest.action.search.RestSearchAction;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
 import org.elasticsearch.xcontent.ObjectParser;
-import org.elasticsearch.common.xcontent.StatusToXContentObject;
+import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
-import org.elasticsearch.rest.RestStatus;
-import org.elasticsearch.rest.action.search.RestSearchAction;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -101,10 +102,10 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
 
     private static final ObjectParser.NamedObjectParser<ClusterIndexHealth, Void> INDEX_PARSER =
         (XContentParser parser, Void context, String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
-    private static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "es.cluster_health.request_timeout_200";
+    static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
     static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout " +
         "will be changed from 408 to 200 in a future version. Set the [" + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + "] " +
-        "system property to [true] to suppress this message and opt in to the future behaviour now.";
+        "query parameter to [true] to suppress this message and opt in to the future behaviour now.";
 
     static {
         // ClusterStateHealth fields
@@ -137,15 +138,7 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
     private boolean timedOut = false;
     private ClusterStateHealth clusterStateHealth;
     private ClusterHealthStatus clusterHealthStatus;
-    private boolean esClusterHealthRequestTimeout200 = readEsClusterHealthRequestTimeout200FromProperty();
-
-    public ClusterHealthResponse() {
-    }
-
-    /** For the testing of opting in for the 200 status code without setting a system property */
-    ClusterHealthResponse(boolean esClusterHealthRequestTimeout200) {
-        this.esClusterHealthRequestTimeout200 = esClusterHealthRequestTimeout200;
-    }
+    private boolean return200ForClusterHealthTimeout;
 
     public ClusterHealthResponse(StreamInput in) throws IOException {
         super(in);
@@ -157,22 +150,29 @@ public ClusterHealthResponse(StreamInput in) throws IOException {
         numberOfInFlightFetch = in.readInt();
         delayedUnassignedShards= in.readInt();
         taskMaxWaitingTime = in.readTimeValue();
+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
+            return200ForClusterHealthTimeout = in.readBoolean();
+        }
     }
 
     /** needed for plugins BWC */
-    public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState) {
-        this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0));
+    public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState,
+                                 boolean return200ForServerTimeout) {
+        this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0),
+            return200ForServerTimeout);
     }
 
     public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, int numberOfPendingTasks,
-                                 int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime) {
+                                 int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime,
+                                 boolean return200ForServerTimeout) {
         this.clusterName = clusterName;
         this.numberOfPendingTasks = numberOfPendingTasks;
         this.numberOfInFlightFetch = numberOfInFlightFetch;
         this.delayedUnassignedShards = delayedUnassignedShards;
         this.taskMaxWaitingTime = taskMaxWaitingTime;
         this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices);
         this.clusterHealthStatus = clusterStateHealth.getStatus();
+        this.return200ForClusterHealthTimeout = return200ForServerTimeout;
     }
 
     /**
@@ -304,6 +304,11 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeInt(numberOfInFlightFetch);
         out.writeInt(delayedUnassignedShards);
         out.writeTimeValue(taskMaxWaitingTime);
+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
+            out.writeBoolean(return200ForClusterHealthTimeout);
+        } else if (return200ForClusterHealthTimeout) {
+            throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
+        }
     }
 
     @Override
@@ -316,7 +321,7 @@ public RestStatus status() {
         if (isTimedOut() == false) {
             return RestStatus.OK;
         }
-        if (esClusterHealthRequestTimeout200) {
+        if (return200ForClusterHealthTimeout) {
             return RestStatus.OK;
         } else {
             deprecationLogger.compatibleCritical("cluster_health_request_timeout", CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
@@ -381,17 +386,4 @@ public int hashCode() {
         return Objects.hash(clusterName, numberOfPendingTasks, numberOfInFlightFetch, delayedUnassignedShards, taskMaxWaitingTime,
                 timedOut, clusterStateHealth, clusterHealthStatus);
     }
-
-    private static boolean readEsClusterHealthRequestTimeout200FromProperty() {
-        String property = System.getProperty(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY);
-        if (property == null) {
-            return false;
-        }
-        if (Boolean.parseBoolean(property)) {
-            return true;
-        } else {
-            throw new IllegalArgumentException(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + " can only be unset or [true] but was ["
-                + property + "]");
-        }
-    }
 }
diff --git a/...main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java b/...main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java
@@ -30,8 +30,8 @@
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.common.util.CollectionUtils;
+import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.index.IndexNotFoundException;
 import org.elasticsearch.node.NodeClosedException;
 import org.elasticsearch.tasks.Task;
@@ -225,7 +225,8 @@ private enum TimeoutState {
 
     private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState,
                                               final int waitFor, TimeoutState timeoutState) {
-        ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.getMasterService().numberOfPendingTasks(),
+        ClusterHealthResponse response = clusterHealth(request, clusterState,
+            clusterService.getMasterService().numberOfPendingTasks(),
             allocationService.getNumberOfInFlightFetches(), clusterService.getMasterService().getMaxTaskWaitTime());
         int readyCounter = prepareResponse(request, response, clusterState, indexNameExpressionResolver);
         boolean valid = (readyCounter == waitFor);
@@ -324,8 +325,8 @@ static int prepareResponse(final ClusterHealthRequest request, final ClusterHeal
     }
 
 
-    private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks,
-                                                int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
+    private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState,
+                                                int numberOfPendingTasks, int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
         if (logger.isTraceEnabled()) {
             logger.trace("Calculating health based on state version [{}]", clusterState.version());
         }
@@ -337,12 +338,13 @@ private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, Cluste
             // one of the specified indices is not there - treat it as RED.
             ClusterHealthResponse response = new ClusterHealthResponse(clusterState.getClusterName().value(), Strings.EMPTY_ARRAY,
                 clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
-                pendingTaskTimeInQueue);
+                pendingTaskTimeInQueue, request.doesReturn200ForClusterHealthTimeout());
             response.setStatus(ClusterHealthStatus.RED);
             return response;
         }
 
-        return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, numberOfPendingTasks,
-                numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue);
+        return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState,
+            numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue,
+            request.doesReturn200ForClusterHealthTimeout());
     }
 }
diff --git a/...er/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java b/...er/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java
@@ -81,6 +81,9 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) {
         if (request.param("wait_for_events") != null) {
             clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
         }
+        clusterHealthRequest.return200ForClusterHealthTimeout(request.paramAsBoolean(
+            "return_200_for_cluster_health_timeout",
+            clusterHealthRequest.doesReturn200ForClusterHealthTimeout()));
         return clusterHealthRequest;
     }
 

diff --git a/...rc/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java b/...rc/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java
@@ -43,6 +43,7 @@ public void testSerialize() throws Exception {
         assertThat(cloneRequest.waitForEvents(), equalTo(originalRequest.waitForEvents()));
         assertIndicesEquals(cloneRequest.indices(), originalRequest.indices());
         assertThat(cloneRequest.indicesOptions(), equalTo(originalRequest.indicesOptions()));
+        assertThat(cloneRequest.doesReturn200ForClusterHealthTimeout(), equalTo(originalRequest.doesReturn200ForClusterHealthTimeout()));
     }
 
     public void testRequestReturnsHiddenIndicesByDefault() {

diff --git a/.../test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java b/.../test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java
@@ -20,10 +20,10 @@
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.xcontent.ToXContent;
-import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.test.AbstractSerializingTestCase;
+import org.elasticsearch.xcontent.ToXContent;
+import org.elasticsearch.xcontent.XContentParser;
 import org.hamcrest.Matchers;
 
 import java.io.IOException;
@@ -43,7 +43,7 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<Clu
     private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());
 
     public void testIsTimeout() {
-        ClusterHealthResponse res = new ClusterHealthResponse();
+        ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, false);
         for (int i = 0; i < 5; i++) {
             res.setTimedOut(randomBoolean());
             if (res.isTimedOut()) {
@@ -56,7 +56,7 @@ public void testIsTimeout() {
     }
 
     public void testTimeoutReturns200IfOptedIn() {
-        ClusterHealthResponse res = new ClusterHealthResponse(true);
+        ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, true);
         for (int i = 0; i < 5; i++) {
             res.setTimedOut(randomBoolean());
             assertEquals(RestStatus.OK, res.status());
@@ -70,7 +70,7 @@ public void testClusterHealth() throws IOException {
         int delayedUnassigned = randomIntBetween(0, 200);
         TimeValue pendingTaskInQueueTime = TimeValue.timeValueMillis(randomIntBetween(1000, 100000));
         ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", new String[] {Metadata.ALL},
-            clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime);
+            clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime, false);
         clusterHealth = maybeSerialize(clusterHealth);
         assertClusterHealth(clusterHealth);
         assertThat(clusterHealth.getNumberOfPendingTasks(), Matchers.equalTo(pendingTasks));