From 77f09e22ba0ceec52e124c7e92574f883f4e9fda Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 26 Feb 2020 14:11:39 -0700 Subject: [PATCH] [Filebeat] Check expand_event_list_from_field before checking content-type (#16441) (#16626) * Check expand_event_list_from_field before checking content-type (cherry picked from commit 4bb8fa33034f5eeae53ed18f811bf64f237a393a) --- CHANGELOG.next.asciidoc | 1 + .../docs/inputs/input-aws-s3.asciidoc | 6 ++++- x-pack/filebeat/input/s3/input.go | 23 ++++++++++--------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 31c8a2ace922..b28c63aaf63c 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -106,6 +106,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix mapping error for cloudtrail additionalEventData field {pull}16088[16088] - Fix a connection error in httpjson input. {pull}16123[16123] - Improve `elasticsearch/audit` fileset to handle timestamps correctly. {pull}15942[15942] +- Fix s3 input with cloudtrail fileset reading json file. {issue}16374[16374] {pull}16441[16441] - Rewrite azure filebeat dashboards, due to changes in kibana. {pull}16466[16466] - Adding the var definitions in azure manifest files, fix for errors when executing command setup. {issue}16270[16270] {pull}16468[16468] - Fix merging of fileset inputs to replace paths and append processors. {pull}16450{16450} diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 6715b8547476..fb84c486a429 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -59,7 +59,11 @@ If the fileset using this input expects to receive multiple messages bundled under a specific field then the config option expand_event_list_from_field value can be assigned the name of the field. This setting will be able to split the messages under the group value into separate events. For example, CloudTrail logs -are in JSON format and events are found under the JSON object "Records": +are in JSON format and events are found under the JSON object "Records". + +Note: When `expand_event_list_from_field` parameter is given in the config, s3 +input will assume the logs are in JSON format and decode them as JSON. Content +type will not be checked. [float] ==== `api_timeout` diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index c038797a807c..9ceba051bf9c 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -427,17 +427,6 @@ func (p *s3Input) createEventsFromS3Info(svc s3iface.ClientAPI, info s3Info, s3C defer resp.Body.Close() reader := bufio.NewReader(resp.Body) - // Check content-type - if (resp.ContentType != nil && *resp.ContentType == "application/x-gzip") || strings.HasSuffix(info.key, ".gz") { - gzipReader, err := gzip.NewReader(resp.Body) - if err != nil { - err = errors.Wrap(err, "gzip.NewReader failed") - p.logger.Error(err) - return err - } - reader = bufio.NewReader(gzipReader) - gzipReader.Close() - } // Decode JSON documents when expand_event_list_from_field is given in config if p.config.ExpandEventListFromField != "" { @@ -451,6 +440,18 @@ func (p *s3Input) createEventsFromS3Info(svc s3iface.ClientAPI, info s3Info, s3C return nil } + // Check content-type + if (resp.ContentType != nil && *resp.ContentType == "application/x-gzip") || strings.HasSuffix(info.key, ".gz") { + gzipReader, err := gzip.NewReader(resp.Body) + if err != nil { + err = errors.Wrap(err, "gzip.NewReader failed") + p.logger.Error(err) + return err + } + reader = bufio.NewReader(gzipReader) + gzipReader.Close() + } + // handle s3 objects that are not json content-type offset := 0 for {