elastic · felixbarny · Jun 29, 2022 · Jun 29, 2022 · Jun 29, 2022 · Jun 29, 2022
diff --git a/docs/reference/mapping/params/format.asciidoc b/docs/reference/mapping/params/format.asciidoc
@@ -52,7 +52,7 @@ not accidentally map irrelevant strings as dates.
 
 The following tables lists all the defaults ISO formats supported:
 
-`epoch_millis`::
+[[epoch-millis]]`epoch_millis`::
 
     A formatter for the number of milliseconds since the epoch. Note, that
     this timestamp is subject to the limits of a Java `Long.MIN_VALUE` and

diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc
@@ -30,6 +30,7 @@ not be included yet.
 * <<ingest-apis,Ingest APIs>>
 * <<info-api,Info API>>
 * <<licensing-apis,Licensing APIs>>
+* <<logs-api, Log ingestion API>>
 * <<logstash-apis,Logstash APIs>>
 * <<ml-apis,{ml-cap} APIs>>
 * <<ml-ad-apis,{ml-cap} {anomaly-detect} APIs>>
@@ -72,6 +73,7 @@ include::{es-repo-dir}/ilm/apis/ilm-api.asciidoc[]
 include::{es-repo-dir}/ingest/apis/index.asciidoc[]
 include::info.asciidoc[]
 include::{es-repo-dir}/licensing/index.asciidoc[]
+include::logs.asciidoc[]
 include::{xes-repo-dir}/rest-api/logstash/index.asciidoc[]
 include::{es-repo-dir}/ml/common/apis/index.asciidoc[]
 include::{es-repo-dir}/ml/anomaly-detection/apis/index.asciidoc[]

diff --git a/docs/reference/rest-api/logs.asciidoc b/docs/reference/rest-api/logs.asciidoc
@@ -0,0 +1,278 @@
+[role="xpack"]
+[[logs-api]]
+== Log ingestion API
+
+experimental::[]
+
+Provides a simple JSON API to ingest log events into {es}.
+
+[discrete]
+[[logs-api-request]]
+=== {api-request-title}
+
+`POST /_logs`
+
+`POST /_logs/<dataset>`
+
+`POST /_logs/<dataset>/<namespace>`
+
+[discrete]
+[[logs-api-prereqs]]
+=== {api-prereq-title}
+* If the {es} {security-features} are enabled, you must have the `create`
+<<privileges-list-indices,index privileges>> for the target data stream.
+* As the target stream depends on routing rules and the data provided in the log events,
+it's recommended to grant permissions for `logs-*-*`.
+* Automatic data stream creation requires a matching index template with data
+stream enabled. See <<set-up-a-data-stream>>.
+
+[discrete]
+[[logs-api-desc]]
+=== {api-description-title}
+
+Provides a way to ingest log events into {es}, similar to the <<docs-bulk, Bulk API>>.
+
+The log events are specified in the request body using a newline delimited JSON (NDJSON) structure.
+
+The events are indexed into the `logs-<dataset>-<namespace>` <<data-streams, data stream>>,
+according to the dataset and namespace parameters, which can be provided globally or on a per-event basis.
+
+The endpoint is designed in a way that logs are never dropped, as long as the cluster has enough capacity.
+
+If an error happens during ingestion,
+the logs are sent to the `logs-dlq-<namespace>` data stream that acts as a dead letter queue for failed events.
+However, log ingestion should rarely fail as the mappings in the built-in index template for the `logs-*-*` data streams are designed to minimize mapping conflicts.
+
+A <<logs-api-request-body, couple of fields>> from the {ecs-ref}[Elastic Common Schema (ECS)] are indexed by default that are commonly used to search or filter logs.
+All other fields are not indexed by default.
+But you can still add any top-level fields and use them in searches and aggregations as the default index template for logs
+<<dynamic-mapping-runtime-fields, maps dynamic fields as runtime fields>>.
+
+For custom fields that are frequently used in searches or aggregations, you might want to leverage the speed benefits of indexing the field.
+This comes at the expense of a larger index size and more processing at ingest time.
+To index a custom field, <<create-index-template, create an index template>> for your dataset `logs-<dataset>-*` based on the built-in index template for `logs-*-*` and add your custom field to the mappings.
+To immediately apply the setting, <<manually-roll-over-a-data-stream, roll over the data stream>>
+This affects any new data added to the stream after the rollover.
+However, it does not affect the data stream’s existing backing indices or existing data.
+
+All fields, aside from the `@timestamp` field, are configured to <<ignore-malformed, ignore malformed>> values.
+This means that if a log event contains a field whose type is incompatible with the type of the field that exists in the mapping,
+{es} will ignore the field instead of rejecting the whole document.
+For example, when a string is provided for a field that is mapped to integer.
+Note that this currently doesn't apply for object/scalar mismatches, such as `"foo": "bar"` vs `"foo.bar": "baz"`.
+
+[discrete]
+[[logs-api-path-params]]
+=== {api-path-parms-title}
+
+{ecs-ref}/ecs-data_stream.html#field-data-stream-dataset[`data_stream.dataset`]::
+  (Optional, string)
+  Defaults to `generic`.
+  Describes the ingested data and its structure.
+  It is highly recommended to provide a value for this so that you can add structure to your logs after the fact.
+  Example: `nginx.access`.
+
+{ecs-ref}/ecs-data_stream.html#field-data-stream-namespace[`data_stream.namespace`]::
+  (Optional, string)
+  Defaults to `default`.
+  A user-configurable arbitrary grouping, such as an environment (dev, prod, or qa), a team, or a strategic business unit.
+
+[discrete]
+[[logs-api-query-params]]
+=== {api-query-parms-title}
+
+Any provided query parameter will be added to each log line.
+For example, `/_logs?service.name=myapp` will add `"service.name": "myapp"` to all logs.
+
+[discrete]
+[[logs-api-request-body]]
+=== {api-request-body-title}
+The request body contains a newline-delimited list of log events to ingest.
+The individual events don't have any required fields and can contain arbitrary JSON content.
+There is no required structure for the log events, and you can add any top-level fields.
+However, it is recommended to follow the {ecs-ref}[Elastic Common Schema (ECS)] to structure your logs.
+
+TIP: Use the {ecs-logging-ref}/intro.html[ECS logging libraries] to create ECS-compliant JSON logs.
+
+Only the following fields, which are commonly used for searching, filtering, and correlating logs, are indexed by default:
+
+* {ecs-ref}/ecs-base.html#field-timestamp[`@timestamp`] +
+  If not provided, will be set to the current time.
+  Can be provided as <<epoch-millis,`epoch_millis`>> or <<strict-date-time,`strict_date_optional_time`>> by default.
+* {ecs-ref}/ecs-data_stream.html#field-data-stream-dataset[`data_stream.dataset`] +
+  Overrides the `data_stream.dataset` <<logs-api-path-params, path parameter>> on a per-event basis.
+* {ecs-ref}/ecs-data_stream.html#field-data-stream-namespace[`data_stream.namespace`] +
+  Overrides the `data_stream.namespace` <<logs-api-path-params, path parameter>> on a per-event basis.
+* {ecs-ref}/ecs-base.html#field-message[`message`]
+* {ecs-ref}/ecs-log.html#field-log-level[`log.level`]
+* {ecs-ref}/ecs-log.html#field-log-logger[`log.logger`]
+* {ecs-ref}/ecs-service.html#field-service-name[`service.name`]
+* {ecs-ref}/ecs-service.html#field-service-environment[`service.environment`]
+* {ecs-ref}/ecs-service.html#field-service-version[`service.version`]
+* {ecs-ref}/ecs-tracing.html#field-trace-id[`trace.id`]
+* {ecs-ref}/ecs-tracing.html#field-transaction-id[`transaction.id`]
+* {ecs-ref}/ecs-tracing.html#field-span-id[`span.id`]
+* {ecs-ref}/ecs-process.html#field-process-pid[`process.pid`]
+* {ecs-ref}/ecs-process.html#field-process-thread-name[`process.thread.name`]
+* {ecs-ref}/ecs-error.html#field-error-type[`error.type`]
+* {ecs-ref}/ecs-error.html#field-error-message[`error.message`]
+* {ecs-ref}/ecs-event.html#field-event-dataset[`event.dataset`]
+* {ecs-ref}/ecs-cloud.html#field-cloud-provider[`cloud.provider`]
+* {ecs-ref}/ecs-cloud.html#field-cloud-availability-zone[`cloud.availability_zone`]
+* {ecs-ref}/ecs-cloud.html#field-cloud-region[`cloud.region`]
+* {ecs-ref}/ecs-host.html#field-host-hostname[`host.hostname`]
+* {ecs-ref}/ecs-host.html#field-host-name[`host.name`]
+* {ecs-ref}/ecs-container.html#field-container-id[`container.id`]
+* {ecs-ref}/ecs-container.html#field-container-name[`container.name`]
+* {ecs-ref}/ecs-orchestrator.html#field-orchestrator-namespace[`orchestrator.namespace`]
+* {ecs-ref}/ecs-orchestrator.html#field-orchestrator-cluster-id[`orchestrator.cluster.id`]
+* {ecs-ref}/ecs-orchestrator.html#field-orchestrator-cluster-name[`orchestrator.cluster.name`]
+* {ecs-ref}/ecs-orchestrator.html#field-orchestrator-resource-id[`orchestrator.resource.id`]
+* {ecs-ref}/ecs-orchestrator.html#field-orchestrator-resource-name[`orchestrator.resource.name`]
+
+Dotted field names are expanded to objects so that they can be used interchangeably with nested objects. For example, the following documents are treated equally: `{"log.level": "INFO"}`, `{"log": { "level": "INFO"} }`.
+
+`_metadata`::
+(Optional, object)
+Marks this line as a metadata line.
+Provides metadata that will be merged into subsequent events.
+If a metadata event is provided as the first line, the metadata is added to all logs events.
+If a metadata event is provided after the first line, the metadata is added to all subsequent log events until another metadata event is provided.
+This way you can easily add global metadata and send logs from multiple datasets in a single request, providing dataset-specific metadata.
+
+[discrete]
+[[logs-api-response-body]]
+==== {api-response-body-title}
+
+The log API's response body is always empty.
+
+Status
+
+* 202 Accepted: The log events have been received and are processed in the background. They should be searchable after a short while.
+* 500 Internal Server Error: There was an error while processing the log events. Some logs may have been lost.
+
+[discrete]
+[[logs-api-example]]
+=== {api-examples-title}
+
+Ingests a single log into the `logs-myapp-default` data stream.
+
+[source,console]
+------------------------------------------------------------
+POST _logs/myapp?service.name=myapp <1>
+{"@timestamp": 1463990734853, "message": "Hello", "custom": "value"} <2>
+------------------------------------------------------------
+
+<1> Provides global metadata that applies to all log events in the request via query parameters.
+<2> Specifies the timestamp in milliseconds since epoch.
+
+After a short while the logs will become searchable.
+Event though `custom_field` is not among the <<logs-api-request-body, list of fields that are indexed by default>>,
+you can use it in searches and aggregations as it is mapped as a <<dynamic-mapping-runtime-fields, dynamic runtime field>>.
+
+////
+[source,console]
+----
+POST logs-myapp-default/_refresh
+----
+// TEST[continued]
+// commented out to avoid documenting that a _refresh will always be sufficient to make the logs searchable
+// in the future, logs may be buffered and asynchronously processed
+////
+
+[source,console]
+------------------------------------------------------------
+POST logs-myapp-default/_search?q=custom:value
+------------------------------------------------------------
+// TEST[continued]
+
+The API returns the following response:
+
+[source,console-result]
+----
+{
+  "took": 5,
+  "timed_out": false,
+  "_shards": {
+    "total": 1,
+    "successful": 1,
+    "skipped": 0,
+    "failed": 0
+  },
+  "hits": {
+    "total": {
+      "value": 1,
+      "relation": "eq"
+    },
+    "max_score": 1.0,
+    "hits": [
+      {
+        "_index": ".ds-logs-foo-default-2016.05.23-000001",
+        "_id": "FKgQT4IBWsM7OYMsIp0N",
+        "_score": 1.0,
+        "_source": {
+          "@timestamp": 1463990734853,
+          "message": "Hello",
+          "custom": "value",
+          "service": {
+            "name": "myapp"
+          },
+          "data_stream": {
+            "type": "logs",
+            "dataset": "myapp",
+            "namespace": "default"
+          }
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/"took": 5/"took": $body.took/]
+// TESTRESPONSE[s/"_index": ".*"/"_index": $body.hits.hits.0._index/]
+// TESTRESPONSE[s/"_id": ".*"/"_id": $body.hits.hits.0._id/]
+// TESTRESPONSE[s/"_source": \{\n/"_source": \{\n"error_trace": "true",\n/]
+// The test system adds an error_trace:true parameter to all requests,
+// including the logs API which interprets it as global metadata that's added to every event
+
+'''
+
+Ingests a single log into the `logs-myapp-default` data stream.
+
+[source,console]
+------------------------------------------------------------
+POST _logs/myapp
+{"_metadata": {"service.name": "myapp"}} <1>
+{"@timestamp": "2016-05-23T08:05:34.853Z", "message": "Hello World"} <2>
+------------------------------------------------------------
+
+<1> Provides global metadata that applies to all log events in the request via a global metadata event.
+<2> Specifies the timestamp as an ISO date string.
+
+'''
+
+Ingests two log events into the `logs-myapp-default` and `logs-my_other_app-default` data stream, respectively.
+Provides metadata via local metadata events.
+
+[source,console]
+------------------------------------------------------------
+POST _logs
+{"_metadata": {"service.name": "myapp"}} <1>
+{"_metadata": {"data_stream.dataset": "myapp"}} <2>
+{"@timestamp": "2016-05-23T08:05:34.853Z", "message": "hello app"} <3>
+{"_metadata": {"data_stream.dataset": "my_other_app"}} <4>
+{"@timestamp": "2016-05-23T08:05:34.853Z", "message": "other app"} <5>
+------------------------------------------------------------
+
+<1> Provides global metadata that applies to all log events in the request via a global metadata event.
+<2> Provides local metadata that gets merged into all subsequent log lines until the next local metadata object is provided.
+In this case, the metadata applies to the next event.
+<3> This log event will have the following metadata:
+`"service.name": "myapp"` (from the global metadata object) and
+`"data_stream.dataset": "myapp"` (from the first local metadata object)
+<4> Provides local metadata that invalidates the previous local metadata.
+It gets merged into all subsequent log lines until the next local metadata object is provided.
+In this case, the metadata applies to the last event.
+<5> This log event will have the following metadata:
+`"service.name": "myapp"` (from the global metadata object) and
+`"data_stream.dataset": "my_other_app"` (from the second local metadata object)
diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java
@@ -10,6 +10,7 @@
 
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.util.MapUtils;
 import org.elasticsearch.ingest.AbstractProcessor;
 import org.elasticsearch.ingest.ConfigurationUtils;
 import org.elasticsearch.ingest.IngestDocument;
@@ -108,7 +109,7 @@ public static void apply(Map<String, Object> ctx, String fieldName, boolean allo
             @SuppressWarnings("unchecked")
             Map<String, Object> map = (Map<String, Object>) value;
             if (conflictStrategy == ConflictStrategy.MERGE) {
-                recursiveMerge(ctx, map);
+                MapUtils.recursiveMerge(ctx, map);
             } else {
                 ctx.putAll(map);
             }
@@ -117,26 +118,6 @@ public static void apply(Map<String, Object> ctx, String fieldName, boolean allo
         }
     }
 
-    public static void recursiveMerge(Map<String, Object> target, Map<String, Object> from) {
-        for (String key : from.keySet()) {
-            if (target.containsKey(key)) {
-                Object targetValue = target.get(key);
-                Object fromValue = from.get(key);
-                if (targetValue instanceof Map && fromValue instanceof Map) {
-                    @SuppressWarnings("unchecked")
-                    Map<String, Object> targetMap = (Map<String, Object>) targetValue;
-                    @SuppressWarnings("unchecked")
-                    Map<String, Object> fromMap = (Map<String, Object>) fromValue;
-                    recursiveMerge(targetMap, fromMap);
-                } else {
-                    target.put(key, fromValue);
-                }
-            } else {
-                target.put(key, from.get(key));
-            }
-        }
-    }
-
     @Override
     public IngestDocument execute(IngestDocument document) throws Exception {
         if (addToRoot) {