diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2ba1c3670d4e..61ce175cf946 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -742,6 +742,7 @@ from being added to events by default. {pull}18159[18159] - Improve panw ECS url fields mapping. {pull}22481[22481] - Improve Nats filebeat dashboard. {pull}22726[22726] - Add support for UNIX datagram sockets in `unix` input. {issues}18632[18632] {pull}22699[22699] +- Add new httpjson input features and mark old config ones for deprecation {pull}22320[22320] *Heartbeat* diff --git a/x-pack/filebeat/docs/inputs/images/input-httpjson-lifecycle.png b/x-pack/filebeat/docs/inputs/images/input-httpjson-lifecycle.png new file mode 100644 index 000000000000..b60d80fbb8c0 Binary files /dev/null and b/x-pack/filebeat/docs/inputs/images/input-httpjson-lifecycle.png differ diff --git a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc index 24e673d09e6d..6cca3102ae05 100644 --- a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc @@ -13,20 +13,28 @@ beta[] Use the `httpjson` input to read messages from an HTTP API with JSON payloads. -For example, this input is used to retrieve MISP threat indicators in the -Filebeat <> module. - -This input supports retrieval at a configurable interval and pagination. +This input supports: + +* Auth +** Basic +** OAuth2 +* Retrieval at a configurable interval +* Pagination +* Retries +* Rate limiting +* Request transformations +* Response transformations Example configurations: ["source","yaml",subs="attributes"] ---- -{beatname_lc}.inputs: +filebeat.inputs: # Fetch your public IP every minute. - type: httpjson - url: https://api.ipify.org/?format=json + config_version: 2 interval: 1m + request.url: https://api.ipify.org/?format=json processors: - decode_json_fields fields: [message] @@ -35,42 +43,161 @@ Example configurations: ["source","yaml",subs="attributes"] ---- -{beatname_lc}.inputs: +filebeat.inputs: - type: httpjson - url: http://localhost:9200/_search?scroll=5m - http_method: POST - json_objects_array: hits.hits - pagination: - extra_body_content: - scroll: 5m - id_field: _scroll_id - req_field: scroll_id - url: http://localhost:9200/_search/scroll + config_version: 2 + request.url: http://localhost:9200/_search?scroll=5m + request.method: POST + response.split: + target: body.hits.hits + response.pagination: + - set: + target: url.value + value: http://localhost:9200/_search/scroll + - set: + target: .url.params.scroll_id + value: '{{.last_request.body._scroll_id}}' + - set: + target: .body.scroll + value: 5m ---- -Additionally, it supports authentication via HTTP Headers, API key or oauth2. +Additionally, it supports authentication via Basic auth, HTTP Headers or oauth2. Example configurations with authentication: ["source","yaml",subs="attributes"] ---- -{beatname_lc}.inputs: +filebeat.inputs: - type: httpjson - http_headers: - Authorization: 'Basic aGVsbG86d29ybGQ=' - url: http://localhost + config_version: 2 + request.url: http://localhost + request.transforms: + - set: + target: header.Authorization + value: 'Basic aGVsbG86d29ybGQ=' ---- ["source","yaml",subs="attributes"] ---- -{beatname_lc}.inputs: +filebeat.inputs: - type: httpjson - oauth2: + config_version: 2 + auth.oauth2: client.id: 12345678901234567890abcdef client.secret: abcdef12345678901234567890 token_url: http://localhost/oauth2/token - url: http://localhost + request.url: http://localhost +---- + +[[input-state]] +==== Input state + +The `httpjson` input keeps a runtime state between requests. This state can be accessed by some configuration options and transforms. + +The state has the following elements: + +- `last_response.url.value`: The full URL with params and fragments from the last request with a successful response. +- `last_request.url.params`: A map containing the params from the URL in `last_response.url.value`. +- `last_response.header`: A map containing the headers from the last successful response. +- `last_response.body`: A map containing the parsed JSON body from the last successful response. This is the response as it comes from the remote server. +- `last_response.page`: A number indicating the page number of the last response. +- `last_event`: A map representing the last event sent to the output (result from applying transforms to `last_response.body`). +- `url.value`: The full URL with params and fragments. +- `url.params`: A map containing the URL params. +- `header`: A map containing the headers. References request headers when used in <> or <> configuration sections, and to the last response headers when used in <>, <> or <> configuration sections. +- `body`: A map containing the body. References request body when used in <> configuration section, and to the last response body when used in <>, <> or <> configuration sections. +- `cursor`: A map containing any data the user configured to be stored between restarts (See <>). + +All of the mentioned objects are only stored at runtime, except `cursor`, which has values that are persisted between restarts. + +[[transforms]] +==== Transforms + +A transform is an action that lets the user modify the <>. Depending on where the transform is defined, it will have access for reading or writing different elements of the <>. + +The access limitations are described in the corresponding configuration sections. + +[float] +==== `append` + +Appends a value to a list. If the field does not exist, the first entry will be a scalar value, and subsequent additions will convert the value to a list. + +["source","yaml",subs="attributes"] +---- +- append: + target: body.foo.bar + value: '{{.cursor.baz}}' + default: "a default value" +---- + +- `target` defines the destination field where the value is stored. +- `value` defines the value that will be stored and it is a <>. +- `default` defines the fallback value whenever `value` is empty or the template parsing fails. Default templates do not have access to any state, only to functions. + +[float] +==== `delete` + +Deletes the target field. + +["source","yaml",subs="attributes"] ---- +- delete: + target: body.foo.bar +---- + +- `target` defines the destination field to delete. If `target` is a list and not a single element, the complete list will be deleted. + +[float] +==== `set` + +Sets a value. + +["source","yaml",subs="attributes"] +---- +- set: + target: body.foo.bar + value: '{{.cursor.baz}}' + default: "a default value" +---- + +- `target` defines the destination field where the value is stored. +- `value` defines the value that will be stored and it is a <>. +- `default` defines the fallback value whenever `value` is empty or the template parsing fails. Default templates do not have access to any state, only to functions. + +[[value-templates]] +==== Value templates + +Some configuration options and transforms can use value templates. Value templates are Go templates with access to the input state and to some built-in functions. + +To see which <> and operations are available, see the documentation for the option or <> where you want to use a value template. + +A value template looks like: + +["source","yaml",subs="attributes"] +---- +- set: + target: body.foo.bar + value: '{{.cursor.baz}} more data' + default: "a default value" +---- + +The content inside the curly braces `{{` `}}` is evaluated. For more information on Go templates please refer to https://golang.org/pkg/text/template[the Go docs]. + +Some built-in helper functions are provided to work with the input state inside value templates: + +- `parseDuration`: parses duration strings and returns `time.Duration`. Example: `{{parseDuration "1h"}}`. +- `now`: returns the current `time.Time` object in UTC. Optionally, it can receive a `time.Duration` as a parameter. Example: `{{now (parseDuration "-1h")}}` returns the time at 1 hour before now. +- `parseTimestamp`: parses a timestamp in seconds and returns a `time.Time` in UTC. Example: `{{parseTimestamp 1604582732}}` returns `2020-11-05 13:25:32 +0000 UTC`. +- `parseTimestampMilli`: parses a timestamp in milliseconds and returns a `time.Time` in UTC. Example: `{{parseTimestamp 1604582732000}}` returns `2020-11-05 13:25:32 +0000 UTC`. +- `parseTimestampNano`: parses a timestamp in nanoseconds and returns a `time.Time` in UTC. Example: `{{parseTimestamp 1604582732000000000}}` returns `2020-11-05 13:25:32 +0000 UTC`. +- `parseDate`: parses a date string and returns a `time.Time` in UTC. By default the expected layout is `RFC3339` but optionally can accept any of the Golang predefined layouts or a custom one. Example: `{{ parseDate "2020-11-05T12:25:32Z" }}`, `{{ parseDate "2020-11-05T12:25:32.1234567Z" "RFC3339Nano" }}`, `{{ (parseDate "Thu Nov 5 12:25:32 +0000 2020" "Mon Jan _2 15:04:05 -0700 2006").UTC }}`. +- `formatDate`: formats a `time.Time`. By default the format layout is `RFC3339` but optionally can accept any of the Golang predefined layouts or a custom one. It will default to UTC timezone when formatting, but you can specify a different timezone. If the timezone is incorrect, it will default to UTC. Example: `{{ formatDate (now) "UnixDate" }}`, `{{ formatDate (now) "UnixDate" "America/New_York" }}`. +- `getRFC5988Link`: extracts a specific relation from a list of https://tools.ietf.org/html/rfc5988[RFC5988] links. It is useful when parsing header values for pagination. Example: `{{ getRFC5988Link "next" .last_response.header.Link }}`. +- `toInt`: converts a string to an integer. Returns 0 if the conversion fails. +- `add`: adds a list of integers and returns their sum. + +In addition to the provided functions, any of the native functions for `time.Time` and `http.Header` types can be used on the corresponding objects. Examples: `{{(now).Day}}`, `{{.last_response.header.Get "key"}}` ==== Configuration options @@ -78,49 +205,160 @@ The `httpjson` input supports the following configuration options plus the <<{beatname_lc}-input-{type}-common-options>> described later. [float] -==== `api_key` +==== `config_version` -API key to access the HTTP API. When set, this adds an `Authorization` header to -the HTTP request with this as the value. +Defines the configuration version. Current supported versions are: `1` and `2`. Default: `1`. + +NOTE: This setting defaults to `1` to avoid breaking current configurations. V1 configuration is deprecated and will be unsupported in future releases. Any new configuration should use `config_version: 2`. [float] -==== `http_client_timeout` +==== `interval` -Duration before declaring that the HTTP client connection has timed out. -Defaults to `60s`. Valid time units are `ns`, `us`, `ms`, `s` (default), `m`, -`h`. +Duration between repeated requests. It may make additional pagination requests in response to the initial request if pagination is enabled. Default: `60s`. [float] -==== `http_headers` +==== `auth.basic.enabled` + +When set to `false`, disables the basic auth configuration. Default: `true`. + +NOTE: Basic auth settings are disabled if either `enabled` is set to `false` or +the `auth.basic` section is missing. + +[float] +==== `auth.basic.user` + +The user to authenticate with. + +[float] +==== `auth.basic.password` + +The password to use. + +[float] +==== `auth.oauth2.enabled` + +When set to `false`, disables the oauth2 configuration. Default: `true`. + +NOTE: OAuth2 settings are disabled if either `enabled` is set to `false` or +the `auth.oauth2` section is missing. + +[float] +==== `auth.oauth2.provider` + +Used to configure supported oauth2 providers. +Each supported provider will require specific settings. It is not set by default. +Supported providers are: `azure`, `google`. + +[float] +==== `auth.oauth2.client.id` + +The client ID used as part of the authentication flow. It is always required +except if using `google` as provider. Required for providers: `default`, `azure`. + +[float] +==== `auth.oauth2.client.secret` + +The client secret used as part of the authentication flow. It is always required +except if using `google` as provider. Required for providers: `default`, `azure`. + +[float] +==== `auth.oauth2.scopes` -Additional HTTP headers to set in the requests. The default value is `null` -(no additional headers). +A list of scopes that will be requested during the oauth2 flow. +It is optional for all providers. + +[float] +==== `auth.oauth2.token_url` + +The endpoint that will be used to generate the tokens during the oauth2 flow. It is required if no provider is specified. + +NOTE: For `azure` provider either `token_url` or `azure.tenant_id` is required. + +[float] +==== `auth.oauth2.endpoint_params` + +Set of values that will be sent on each request to the `token_url`. Each param key can have multiple values. +Can be set for all providers except `google`. ["source","yaml",subs="attributes"] ---- - type: httpjson - http_headers: - Authorization: 'Basic aGVsbG86d29ybGQ=' + config_version: 2 + auth.oauth2: + endpoint_params: + Param1: + - ValueA + - ValueB + Param2: + - Value ---- [float] -==== `http_method` +==== `auth.oauth2.azure.tenant_id` + +Used for authentication when using `azure` provider. +Since it is used in the process to generate the `token_url`, it can't be used in +combination with it. It is not required. -HTTP method to use when making requests. `GET` or `POST` are the options. -Defaults to `GET`. +For information about where to find it, you can refer to +https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal. [float] -==== `http_request_body` +==== `auth.oauth2.azure.resource` + +The accessed WebAPI resource when using `azure` provider. +It is not required. + +[float] +==== `auth.oauth2.google.credentials_file` + +The credentials file for Google. + +NOTE: Only one of the credentials settings can be set at once. If none is provided, loading +default credentials from the environment will be attempted via ADC. For more information about +how to provide Google credentials, please refer to https://cloud.google.com/docs/authentication. + +[float] +==== `auth.oauth2.google.credentials_json` + +Your credentials information as raw JSON. + +NOTE: Only one of the credentials settings can be set at once. If none is provided, loading +default credentials from the environment will be attempted via ADC. For more information about +how to provide Google credentials, please refer to https://cloud.google.com/docs/authentication. + +[float] +==== `auth.oauth2.google.jwt_file` + +The JWT Account Key file for Google. + +NOTE: Only one of the credentials settings can be set at once. If none is provided, loading +default credentials from the environment will be attempted via ADC. For more information about +how to provide Google credentials, please refer to https://cloud.google.com/docs/authentication. + +[float] +==== `request.url` + +The URL of the HTTP API. Required. + +[float] +==== `request.method` + +HTTP method to use when making requests. `GET` or `POST` are the options. Default: `GET`. + +[float] +==== `request.body` An optional HTTP POST body. The configuration value must be an object, and it -will be encoded to JSON. This is only valid when `http_method` is `POST`. +will be encoded to JSON. This is only valid when `request.method` is `POST`. Defaults to `null` (no HTTP body). ["source","yaml",subs="attributes"] ---- - type: httpjson - http_method: POST - http_request_body: + config_version: 2 + request.method: POST + request.body: query: bool: filter: @@ -129,334 +367,540 @@ Defaults to `null` (no HTTP body). ---- [float] -==== `interval` +==== `request.timeout` -Duration between repeated requests. By default, the interval is `0` which means -it performs a single request then stops. It may make additional pagination -requests in response to the initial request if pagination is enabled. +Duration before declaring that the HTTP client connection has timed out. Valid time units are `ns`, `us`, `ms`, `s`, `m`, `h`. Default: `30s`. [float] -==== `json_objects_array` +==== `request.ssl` + +This specifies SSL/TLS configuration. If the ssl section is missing, the host's +CAs are used for HTTPS connections. See <> for more +information. + +[float] +==== `request.retry.max_attempts` + +The maximum number of retries for the HTTP client. Default: `5`. + +[float] +==== `request.retry.wait_min` + +The minimum time to wait before a retry is attempted. Default: `1s`. + +[float] +==== `request.retry.wait_max` + +The maximum time to wait before a retry is attempted. Default: `60s`. + +[float] +==== `request.redirect.forward_headers` + +When set to `true` request headers are forwarded in case of a redirect. Default: `false`. + +[float] +==== `request.redirect.headers_ban_list` + +When `redirect.forward_headers` is set to `true`, all headers __except__ the ones defined in this list will be forwarded. Default: `[]`. + +[float] +==== `request.redirect.max_redirects` + +The maximum number of redirects to follow for a request. Default: `10`. + +[[request-rate-limit]] +[float] +==== `request.rate_limit.limit` + +The value of the response that specifies the total limit. It is defined with a Go template value. Can read state from: [`.last_response.header`] + +[float] +==== `request.rate_limit.remaining` + +The value of the response that specifies the remaining quota of the rate limit. It is defined with a Go template value. Can read state from: [`.last_response.header`] + +[float] +==== `request.rate_limit.reset` + +The value of the response that specifies the epoch time when the rate limit will reset. It is defined with a Go template value. Can read state from: [`.last_response.header`] + +[[request-transforms]] +[float] +==== `request.transforms` + +List of transforms to apply to the request before each execution. + +Available transforms for request: [`append`, `delete`, `set`]. -If the response body contains a JSON object containing an array then this option -specifies the key containing that array. Each object in that array will generate -an event. This example response contains an array called `events` that we want -to index. +Can read state from: [`.last_response.*`, `.last_event.*`, `.cursor.*`]. +Can write state to: [`header.*`, `url.params.*`, `body.*`]. + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: httpjson + config_version: 2 + request.url: http://localhost:9200/_search?scroll=5m + request.method: POST + request.transforms: + - set: + target: body.from + value: '{{now (parseDuration "-1h")}}' +---- + +[[response-transforms]] +[float] +==== `response.transforms` + +List of transforms to apply to the response once it is received. + +Available transforms for response: [`append`, `delete`, `set`]. + +Can read state from: [`.last_response.*`, `.last_event.*`, `.cursor.*`, `.header.*`, `.url.*`]. + +Can write state to: [`body.*`]. + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: httpjson + config_version: 2 + request.url: http://localhost:9200/_search?scroll=5m + request.method: POST + response.transforms: + - delete: + target: body.very_confidential + response.split: + target: .body.hits.hits + response.pagination: + - set: + target: url.value + value: http://localhost:9200/_search/scroll + - set: + target: .url.params.scroll_id + value: '{{.last_request.body._scroll_id}}' + - set: + target: .body.scroll + value: 5m +---- + +[[response-split]] +[float] +==== `response.split` + +Split operation to apply to the response once it is received. A split can convert a map or an array into multiple events. + +[float] +==== `response.split[].target` + +Defines the target field upon the split operation will be performed. + +[float] +==== `response.split[].type` + +Defines the field type of the target. Allowed values: `array`, `map`. Default: `array`. + +[float] +==== `response.split[].transforms` + +A set of transforms can be defined. This list will be applied after `response.transforms` and after the object has been modified based on `response.split[].keep_parent` and `response.split[].key_field`. + +Available transforms for response: [`append`, `delete`, `set`]. + +Can read state from: [`.last_response.*`, `.last_event.*`, `.cursor.*`, `.header.*`, `.url.*`]. + +Can write state to: [`body.*`]. + +NOTE: in this context, `body.*` will be the result of all the previous transformations. + +[float] +==== `response.split[].keep_parent` + +If set to true, the fields from the parent document (at the same level as `target`) will be kept. Otherwise a new document will be created using `target` as the root. Default: `false`. + +[float] +==== `response.split[].key_field` + +Valid when used with `type: map`. When not empty, defines a new field where the original key value will be stored. + +[float] +==== `response.split[].split` + +Nested split operation. Split operations can be nested at will. An event won't be created until the deepest split operation is applied. + +[[response-pagination]] +[float] +==== `response.pagination` + +List of transforms that will be applied to the response to every new page request. All the transforms from `request.transform` will be executed and then `response.pagination` will be added to modify the next request as needed. For subsequent responses, the usual `response.transforms` and `response.split` will be executed normally. + +Available transforms for pagination: [`append`, `delete`, `set`]. + +Can read state from: [`.last_response.*`, `.last_event.*`, `.cursor.*`]. + +Can write state to: [`body.*`, `header.*`, `url.*`]. + +Examples using split: + +- We have a response with two nested arrays, and we want a document for each of the elements of the inner array: + ++ ["source","json",subs="attributes"] ---- { - "time": "2020-06-02 23:22:32 UTC", - "events": [ + "this": "is kept", + "alerts": [ { - "timestamp": "2020-05-02 11:10:03 UTC", - "event": { - "category": "authorization" - }, - "user": { - "name": "fflintstone" - } + "this_is": "also kept", + "entities": [ + { + "something": "something" + }, + { + "else": "else" + } + ] }, { - "timestamp": "2020-05-05 13:03:11 UTC", - "event": { - "category": "authorization" - }, - "user": { - "name": "brubble" - } + "this_is": "also kept 2", + "entities": [ + { + "something": "something 2" + }, + { + "else": "else 2" + } + ] } ] } ---- -The config needs to specify `events` as the `json_objects_array` value. ++ +The config will look like: ++ ["source","yaml",subs="attributes"] ---- +filebeat.inputs: - type: httpjson - json_objects_array: events + config_version: 2 + interval: 1m + request.url: https://example.com + response.split: + target: body.alerts + type: array + keep_parent: true + split: + # paths in nested splits need to represent the state of body, not only their current level of nesting + target: body.alerts.entities + type: array + keep_parent: true ---- -[float] -==== `split_events_by` ++ +This will output: + ++ +["source","json",subs="attributes"] +---- +[ + { + "this": "is kept", + "alerts": { + "this_is": "also kept", + "entities": { + "something": "something" + } + } + }, + { + "this": "is kept", + "alerts": { + "this_is": "also kept", + "entities": { + "else": "else" + } + } + }, + { + "this": "is kept", + "alerts": { + "this_is": "also kept 2", + "entities": { + "something": "something 2" + } + } + }, + { + "this": "is kept", + "alerts": { + "this_is": "also kept 2", + "entities": { + "else": "else 2" + } + } + } +] +---- -If the response body contains a JSON object containing an array then this option -specifies the key containing that array. Each object in that array will generate -an event, but will maintain the common fields of the document as well. +- We have a response with an array with two objects, and we want a document for each of the object keys while keeping the keys values: ++ ["source","json",subs="attributes"] ---- { - "time": "2020-06-02 23:22:32 UTC", - "user": "Bob", - "events": [ + "this": "is not kept", + "alerts": [ { - "timestamp": "2020-05-02 11:10:03 UTC", - "event": { - "category": "authorization" + "this_is": "kept", + "entities": { + "id1": { + "something": "something" + } } }, { - "timestamp": "2020-05-05 13:03:11 UTC", - "event": { - "category": "authorization" + "this_is": "kept 2", + "entities": { + "id2": { + "something": "something 2" + } } } ] } ---- -The config needs to specify `events` as the `split_events_by` value. ++ +The config will look like: ++ ["source","yaml",subs="attributes"] ---- +filebeat.inputs: - type: httpjson - split_events_by: events + config_version: 2 + interval: 1m + request.url: https://example.com + response.split: + target: body.alerts + type: array + keep_parent: false + split: + # this time alerts will not exist because previous keep_parent is false + target: body.entities + type: map + keep_parent: true + key_field: id ---- -And will output the following events: ++ +This will output: ++ ["source","json",subs="attributes"] ---- [ { - "time": "2020-06-02 23:22:32 UTC", - "user": "Bob", - "events": { - "timestamp": "2020-05-02 11:10:03 UTC", - "event": { - "category": "authorization" - } + "this_is": "kept", + "entities": { + "id": "id1", + "something": "something" } }, { - "time": "2020-06-02 23:22:32 UTC", - "user": "Bob", - "events": { - "timestamp": "2020-05-05 13:03:11 UTC", - "event": { - "category": "authorization" - } + "this_is": "kept 2", + "entities": { + "id": "id2", + "something": "something 2" } } ] ---- -It can be used in combination with `json_objects_array`, which will look for the field inside each element. - -[float] -==== `no_http_body` - -Force HTTP requests to be sent with an empty HTTP body. Defaults to `false`. -This option cannot be used with `http_request_body`, -`pagination.extra_body_content`, or `pagination.req_field`. - -[float] -==== `pagination.enabled` - -The `enabled` setting can be used to disable the pagination configuration by -setting it to `false`. The default value is `true`. - -NOTE: Pagination settings are disabled if either `enabled` is set to `false` or -the `pagination` section is missing. +- We have a response with an array with two objects, and we want a document for each of the object keys while applying a transform to each: -[float] -==== `pagination.extra_body_content` ++ +["source","json",subs="attributes"] +---- +{ + "this": "is not kept", + "alerts": [ + { + "this_is": "also not kept", + "entities": { + "id1": { + "something": "something" + } + } + }, + { + "this_is": "also not kept", + "entities": { + "id2": { + "something": "something 2" + } + } + } + ] +} +---- -An object containing additional fields that should be included in the pagination -request body. Defaults to `null`. ++ +The config will look like: ++ ["source","yaml",subs="attributes"] ---- +filebeat.inputs: - type: httpjson - pagination.extra_body_content: - max_items: 500 + config_version: 2 + interval: 1m + request.url: https://example.com + response.split: + target: body.alerts + type: array + split: + transforms: + - set: + target: body.new + value: will be added to each + target: body.entities + type: map ---- -[float] -==== `pagination.header.field_name` - -The name of the HTTP header in the response that is used for pagination control. -The header value will be extracted from the response and used to make the next -pagination response. `pagination.header.regex_pattern` can be used to select -a subset of the value. - -[float] -==== `pagination.header.regex_pattern` ++ +This will output: -The regular expression pattern to use for retrieving the pagination information -from the HTTP header field specified above. The first match becomes as the -value. - -[float] -==== `pagination.id_field` - -The name of a field in the JSON response body to use as the pagination ID. -The value will be included in the next pagination request under the key -specified by the `pagination.req_field` value. - -[float] -==== `pagination.req_field` - -The name of the field to include in the pagination JSON request body containing -the pagination ID defined by the `pagination.id_field` field. - -[float] -==== `pagination.url` - -This specifies the URL for sending pagination requests. Defaults to the `url` -value. This is only needed when the pagination requests need to be routed to -a different URL. - -[float] -==== `rate_limit.limit` - -This specifies the field in the HTTP header of the response that specifies the -total limit. - -[float] -==== `rate_limit.remaining` - -This specifies the field in the HTTP header of the response that specifies the -remaining quota of the rate limit. ++ +["source","json",subs="attributes"] +---- +[ + { + "something": "something", + "new": "will be added for each" + }, + { + "something": "something 2", + "new": "will be added for each" + } +] +---- +[[cursor]] [float] -==== `rate_limit.reset` - -This specifies the field in the HTTP Header of the response that specifies the -epoch time when the rate limit will reset. +==== `cursor` -[float] -==== `retry.max_attempts` +Cursor is a list of key value objects where arbitrary values are defined. The values are interpreted as <> and a default template can be set. Cursor state is kept between input restarts and updated once all the events for a request are published. -This specifies the maximum number of retries for the retryable HTTP client. Default: 5. +Can read state from: [`.last_response.*`, `.last_event.*`]. -[float] -==== `retry.wait_min` +NOTE: Default templates do not have access to any state, only to functions. -This specifies the minimum time to wait before a retry is attempted. Default: 1s. +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: httpjson + config_version: 2 + interval: 1m + request.url: https://api.ipify.org/?format=json + response.transforms: + - set: + target: body.last_requested_at + value: '{{.cursor.last_requested_at}}' + default: "{{now}}" + cursor: + last_requested_at: + value: '{{now}}' + processors: + - decode_json_fields + fields: [message] + target: json +---- [float] -==== `retry.wait_max` +==== `api_key` -This specifies the maximum time to wait before a retry is attempted. Default: 60s. +Deprecated, use `request.transforms`. [float] -==== `ssl` +==== `http_client_timeout` -This specifies SSL/TLS configuration. If the ssl section is missing, the host's -CAs are used for HTTPS connections. See <> for more -information. +Deprecated, use `request.timeout`. [float] -==== `url` +==== `http_headers` -The URL of the HTTP API. Required. +Deprecated, use `request.transforms`. [float] -==== `oauth2.enabled` - -The `enabled` setting can be used to disable the oauth2 configuration by -setting it to `false`. The default value is `true`. +==== `http_method` -NOTE: OAuth2 settings are disabled if either `enabled` is set to `false` or -the `oauth2` section is missing. +Deprecated, use `request.method`. [float] -==== `oauth2.provider` +==== `http_request_body` -The `provider` setting can be used to configure supported oauth2 providers. -Each supported provider will require specific settings. It is not set by default. -Supported providers are: `azure`, `google`. +Deprecated, use `request.body`. [float] -==== `oauth2.client.id` +==== `json_objects_array` -The `client.id` setting is used as part of the authentication flow. It is always required -except if using `google` as provider. Required for providers: `default`, `azure`. +Deprecated, use `request.split`. [float] -==== `oauth2.client.secret` +==== `split_events_by` -The `client.secret` setting is used as part of the authentication flow. It is always required -except if using `google` as provider. Required for providers: `default`, `azure`. +Deprecated, use `request.split`. [float] -==== `oauth2.scopes` +==== `no_http_body` -The `scopes` setting defines a list of scopes that will be requested during the oauth2 flow. -It is optional for all providers. +Deprecated. [float] -==== `oauth2.token_url` +==== `pagination.*` -The `token_url` setting specifies the endpoint that will be used to generate the -tokens during the oauth2 flow. It is required if no provider is specified. - -NOTE: For `azure` provider either `token_url` or `azure.tenant_id` is required. +Deprecated, use `response.pagination`. [float] -==== `oauth2.endpoint_params` +==== `rate_limit.*` -The `endpoint_params` setting specifies a set of values that will be sent on each -request to the `token_url`. Each param key can have multiple values. -Can be set for all providers except `google`. - -["source","yaml",subs="attributes"] ----- -- type: httpjson - oauth2: - endpoint_params: - Param1: - - ValueA - - ValueB - Param2: - - Value ----- +Deprecated, use `request.rate_limit.*`. [float] -==== `oauth2.azure.tenant_id` +==== `retry.*` -The `azure.tenant_id` is used for authentication when using `azure` provider. -Since it is used in the process to generate the `token_url`, it can't be used in -combination with it. It is not required. - -For information about where to find it, you can refer to -https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal. +Deprecated, use `request.retry.*`. [float] -==== `oauth2.azure.resource` +==== `ssl` -The `azure.resource` is used to identify the accessed WebAPI resource when using `azure` provider. -It is not required. +Deprecated, use `request.ssl`. [float] -==== `oauth2.google.credentials_file` - -The `google.credentials_file` setting specifies the credentials file for Google. +==== `url` -NOTE: Only one of the credentials settings can be set at once. If none is provided, loading -default credentials from the environment will be attempted via ADC. For more information about -how to provide Google credentials, please refer to https://cloud.google.com/docs/authentication. +Deprecated, use `request.url`. [float] -==== `oauth2.google.credentials_json` +==== `oauth2.*` -The `google.credentials_json` setting allows to write your credentials information as raw JSON. +Deprecated, use `auth.oauth2.*`. -NOTE: Only one of the credentials settings can be set at once. If none is provided, loading -default credentials from the environment will be attempted via ADC. For more information about -how to provide Google credentials, please refer to https://cloud.google.com/docs/authentication. +==== Request life cycle -[float] -==== `oauth2.google.jwt_file` - -The `google.jwt_file` setting specifies the JWT Account Key file for Google. +image:images/input-httpjson-lifecycle.png[Request lifecycle] -NOTE: Only one of the credentials settings can be set at once. If none is provided, loading -default credentials from the environment will be attempted via ADC. For more information about -how to provide Google credentials, please refer to https://cloud.google.com/docs/authentication. +. At every defined interval a new request is created. +. The request is transformed using the configured `request.transforms`. +. The resulting transformed request is executed. +. The server responds (here is where any retry or rate limit policy takes place when configured). +. The response is transformed using the configured `response.transforms` and `response.split`. +. Each resulting event is published to the output. +. If a `response.pagination` is configured and there are more pages, a new request is created using it, otherwise the process ends until the next interval. [id="{beatname_lc}-input-{type}-common-options"] include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index 5445197f5630..3d476b143ef3 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -15,7 +15,7 @@ import ( "github.com/hashicorp/go-retryablehttp" "go.uber.org/zap" - v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputv2 "github.com/elastic/beats/v7/filebeat/input/v2" cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" "github.com/elastic/beats/v7/libbeat/beat" @@ -24,6 +24,7 @@ import ( "github.com/elastic/beats/v7/libbeat/common/useragent" "github.com/elastic/beats/v7/libbeat/feature" "github.com/elastic/beats/v7/libbeat/logp" + v2 "github.com/elastic/beats/v7/x-pack/filebeat/input/httpjson/internal/v2" "github.com/elastic/go-concert/ctxtool" "github.com/elastic/go-concert/timed" ) @@ -65,14 +66,15 @@ func (log *retryLogger) Warn(format string, args ...interface{}) { log.log.Warnf(format, args...) } -func Plugin(log *logp.Logger, store cursor.StateStore) v2.Plugin { +func Plugin(log *logp.Logger, store cursor.StateStore) inputv2.Plugin { sim := stateless.NewInputManager(statelessConfigure) - return v2.Plugin{ + return inputv2.Plugin{ Name: inputName, Stability: feature.Beta, Deprecated: false, Manager: inputManager{ - stateless: &sim, + v2inputManager: v2.NewInputManager(log, store), + stateless: &sim, cursor: &cursor.InputManager{ Logger: log, StateStore: store, @@ -117,7 +119,7 @@ func test(url *url.URL) error { } func run( - ctx v2.Context, + ctx inputv2.Context, config config, tlsConfig *tlscommon.TLSConfig, publisher cursor.Publisher, diff --git a/x-pack/filebeat/input/httpjson/input_manager.go b/x-pack/filebeat/input/httpjson/input_manager.go index 8d7e60707869..68a929fb8fb1 100644 --- a/x-pack/filebeat/input/httpjson/input_manager.go +++ b/x-pack/filebeat/input/httpjson/input_manager.go @@ -9,10 +9,12 @@ import ( "github.com/elastic/go-concert/unison" - v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputv2 "github.com/elastic/beats/v7/filebeat/input/v2" cursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/cfgwarn" + v2 "github.com/elastic/beats/v7/x-pack/filebeat/input/httpjson/internal/v2" ) // inputManager wraps one stateless input manager @@ -21,21 +23,30 @@ import ( type inputManager struct { stateless *stateless.InputManager cursor *cursor.InputManager + + v2inputManager v2.InputManager } -var _ v2.InputManager = inputManager{} +var _ inputv2.InputManager = inputManager{} // Init initializes both wrapped input managers. -func (m inputManager) Init(grp unison.Group, mode v2.Mode) error { +func (m inputManager) Init(grp unison.Group, mode inputv2.Mode) error { return multierr.Append( - m.stateless.Init(grp, mode), - m.cursor.Init(grp, mode), + multierr.Append( + m.stateless.Init(grp, mode), + m.cursor.Init(grp, mode), + ), + m.v2inputManager.Init(grp, mode), ) } // Create creates a cursor input manager if the config has a date cursor set up, // otherwise it creates a stateless input manager. -func (m inputManager) Create(cfg *common.Config) (v2.Input, error) { +func (m inputManager) Create(cfg *common.Config) (inputv2.Input, error) { + if v, _ := cfg.String("config_version", -1); v == "2" { + return m.v2inputManager.Create(cfg) + } + cfgwarn.Deprecate("7.12", "you are using a deprecated version of httpjson config") config := newDefaultConfig() if err := cfg.Unpack(&config); err != nil { return nil, err diff --git a/x-pack/filebeat/input/httpjson/internal/v2/config.go b/x-pack/filebeat/input/httpjson/internal/v2/config.go new file mode 100644 index 000000000000..95eac252201b --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/config.go @@ -0,0 +1,53 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "errors" + "time" +) + +type config struct { + Interval time.Duration `config:"interval" validate:"required"` + Auth *authConfig `config:"auth"` + Request *requestConfig `config:"request" validate:"required"` + Response *responseConfig `config:"response"` + Cursor cursorConfig `config:"cursor"` +} + +type cursorConfig map[string]struct { + Value *valueTpl `config:"value"` + Default *valueTpl `config:"default"` +} + +func (c config) Validate() error { + if c.Interval <= 0 { + return errors.New("interval must be greater than 0") + } + return nil +} + +func defaultConfig() config { + timeout := 30 * time.Second + maxAttempts := 5 + waitMin := time.Second + waitMax := time.Minute + return config{ + Interval: time.Minute, + Auth: &authConfig{}, + Request: &requestConfig{ + Timeout: &timeout, + Method: "GET", + Retry: retryConfig{ + MaxAttempts: &maxAttempts, + WaitMin: &waitMin, + WaitMax: &waitMax, + }, + RedirectForwardHeaders: false, + RedirectMaxRedirects: 10, + }, + Response: &responseConfig{}, + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/config_auth.go b/x-pack/filebeat/input/httpjson/internal/v2/config_auth.go new file mode 100644 index 000000000000..b9e9d3ad0da7 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/config_auth.go @@ -0,0 +1,266 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net/http" + "os" + "strings" + + "golang.org/x/oauth2" + "golang.org/x/oauth2/clientcredentials" + "golang.org/x/oauth2/endpoints" + "golang.org/x/oauth2/google" +) + +type authConfig struct { + Basic *basicAuthConfig `config:"basic"` + OAuth2 *oAuth2Config `config:"oauth2"` +} + +func (c authConfig) Validate() error { + if c.Basic.isEnabled() && c.OAuth2.isEnabled() { + return errors.New("only one kind of auth can be enabled") + } + return nil +} + +type basicAuthConfig struct { + Enabled *bool `config:"enabled"` + User string `config:"user"` + Password string `config:"password"` +} + +// IsEnabled returns true if the `enable` field is set to true in the yaml. +func (b *basicAuthConfig) isEnabled() bool { + return b != nil && (b.Enabled == nil || *b.Enabled) +} + +// Validate checks if oauth2 config is valid. +func (b *basicAuthConfig) Validate() error { + if !b.isEnabled() { + return nil + } + + if b.User == "" || b.Password == "" { + return errors.New("both user and password must be set") + } + + return nil +} + +// An oAuth2Provider represents a supported oauth provider. +type oAuth2Provider string + +const ( + oAuth2ProviderDefault oAuth2Provider = "" // oAuth2ProviderDefault means no specific provider is set. + oAuth2ProviderAzure oAuth2Provider = "azure" // oAuth2ProviderAzure AzureAD. + oAuth2ProviderGoogle oAuth2Provider = "google" // oAuth2ProviderGoogle Google. +) + +func (p *oAuth2Provider) Unpack(in string) error { + *p = oAuth2Provider(in) + return nil +} + +func (p oAuth2Provider) canonical() oAuth2Provider { + return oAuth2Provider(strings.ToLower(string(p))) +} + +type oAuth2Config struct { + Enabled *bool `config:"enabled"` + + // common oauth fields + ClientID string `config:"client.id"` + ClientSecret string `config:"client.secret"` + EndpointParams map[string][]string `config:"endpoint_params"` + Provider oAuth2Provider `config:"provider"` + Scopes []string `config:"scopes"` + TokenURL string `config:"token_url"` + + // google specific + GoogleCredentialsFile string `config:"google.credentials_file"` + GoogleCredentialsJSON []byte `config:"google.credentials_json"` + GoogleJWTFile string `config:"google.jwt_file"` + GoogleDelegatedAccount string `config:"google.delegated_account"` + + // microsoft azure specific + AzureTenantID string `config:"azure.tenant_id"` + AzureResource string `config:"azure.resource"` +} + +// IsEnabled returns true if the `enable` field is set to true in the yaml. +func (o *oAuth2Config) isEnabled() bool { + return o != nil && (o.Enabled == nil || *o.Enabled) +} + +// Client wraps the given http.Client and returns a new one that will use the oauth authentication. +func (o *oAuth2Config) client(ctx context.Context, client *http.Client) (*http.Client, error) { + ctx = context.WithValue(ctx, oauth2.HTTPClient, client) + + switch o.getProvider() { + case oAuth2ProviderAzure, oAuth2ProviderDefault: + creds := clientcredentials.Config{ + ClientID: o.ClientID, + ClientSecret: o.ClientSecret, + TokenURL: o.getTokenURL(), + Scopes: o.Scopes, + EndpointParams: o.getEndpointParams(), + } + return creds.Client(ctx), nil + case oAuth2ProviderGoogle: + if o.GoogleJWTFile != "" { + cfg, err := google.JWTConfigFromJSON(o.GoogleCredentialsJSON, o.Scopes...) + if err != nil { + return nil, fmt.Errorf("oauth2 client: error loading jwt credentials: %w", err) + } + cfg.Subject = o.GoogleDelegatedAccount + return cfg.Client(ctx), nil + } + + creds, err := google.CredentialsFromJSON(ctx, o.GoogleCredentialsJSON, o.Scopes...) + if err != nil { + return nil, fmt.Errorf("oauth2 client: error loading credentials: %w", err) + } + return oauth2.NewClient(ctx, creds.TokenSource), nil + default: + return nil, errors.New("oauth2 client: unknown provider") + } +} + +// getTokenURL returns the TokenURL. +func (o *oAuth2Config) getTokenURL() string { + switch o.getProvider() { + case oAuth2ProviderAzure: + if o.TokenURL == "" { + return endpoints.AzureAD(o.AzureTenantID).TokenURL + } + } + + return o.TokenURL +} + +// getProvider returns provider in its canonical form. +func (o oAuth2Config) getProvider() oAuth2Provider { + return o.Provider.canonical() +} + +// getEndpointParams returns endpoint params with any provider ones combined. +func (o oAuth2Config) getEndpointParams() map[string][]string { + switch o.getProvider() { + case oAuth2ProviderAzure: + if o.AzureResource != "" { + if o.EndpointParams == nil { + o.EndpointParams = map[string][]string{} + } + o.EndpointParams["resource"] = []string{o.AzureResource} + } + } + + return o.EndpointParams +} + +// Validate checks if oauth2 config is valid. +func (o *oAuth2Config) Validate() error { + if !o.isEnabled() { + return nil + } + + switch o.getProvider() { + case oAuth2ProviderAzure: + return o.validateAzureProvider() + case oAuth2ProviderGoogle: + return o.validateGoogleProvider() + case oAuth2ProviderDefault: + if o.TokenURL == "" || o.ClientID == "" || o.ClientSecret == "" { + return errors.New("both token_url and client credentials must be provided") + } + default: + return fmt.Errorf("unknown provider %q", o.getProvider()) + } + + return nil +} + +// findDefaultGoogleCredentials will default to google.FindDefaultCredentials and will only be changed for testing purposes +var findDefaultGoogleCredentials = google.FindDefaultCredentials + +func (o *oAuth2Config) validateGoogleProvider() error { + if o.TokenURL != "" || o.ClientID != "" || o.ClientSecret != "" || + o.AzureTenantID != "" || o.AzureResource != "" || len(o.EndpointParams) > 0 { + return errors.New("none of token_url and client credentials can be used, use google.credentials_file, google.jwt_file, google.credentials_json or ADC instead") + } + + // credentials_json + if len(o.GoogleCredentialsJSON) > 0 { + if o.GoogleDelegatedAccount != "" { + return errors.New("google.delegated_account can only be provided with a jwt_file") + } + if !json.Valid(o.GoogleCredentialsJSON) { + return errors.New("google.credentials_json must be valid JSON") + } + return nil + } + + // credentials_file + if o.GoogleCredentialsFile != "" { + if o.GoogleDelegatedAccount != "" { + return errors.New("google.delegated_account can only be provided with a jwt_file") + } + return o.populateCredentialsJSONFromFile(o.GoogleCredentialsFile) + } + + // jwt_file + if o.GoogleJWTFile != "" { + return o.populateCredentialsJSONFromFile(o.GoogleJWTFile) + } + + // Application Default Credentials (ADC) + ctx := context.Background() + if creds, err := findDefaultGoogleCredentials(ctx, o.Scopes...); err == nil { + o.GoogleCredentialsJSON = creds.JSON + return nil + } + + return fmt.Errorf("no authentication credentials were configured or detected (ADC)") +} + +func (o *oAuth2Config) populateCredentialsJSONFromFile(file string) error { + if _, err := os.Stat(file); os.IsNotExist(err) { + return fmt.Errorf("the file %q cannot be found", file) + } + + credBytes, err := ioutil.ReadFile(file) + if err != nil { + return fmt.Errorf("the file %q cannot be read", file) + } + + if !json.Valid(credBytes) { + return fmt.Errorf("the file %q does not contain valid JSON", file) + } + + o.GoogleCredentialsJSON = credBytes + + return nil +} + +func (o *oAuth2Config) validateAzureProvider() error { + if o.TokenURL == "" && o.AzureTenantID == "" { + return errors.New("at least one of token_url or tenant_id must be provided") + } + if o.TokenURL != "" && o.AzureTenantID != "" { + return errors.New("only one of token_url and tenant_id can be used") + } + if o.ClientID == "" || o.ClientSecret == "" { + return errors.New("client credentials must be provided") + } + + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/config_request.go b/x-pack/filebeat/input/httpjson/internal/v2/config_request.go new file mode 100644 index 000000000000..a76b115cfcab --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/config_request.go @@ -0,0 +1,120 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "errors" + "fmt" + "net/url" + "strings" + "time" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" +) + +type retryConfig struct { + MaxAttempts *int `config:"max_attempts"` + WaitMin *time.Duration `config:"wait_min"` + WaitMax *time.Duration `config:"wait_max"` +} + +func (c retryConfig) Validate() error { + switch { + case c.MaxAttempts != nil && *c.MaxAttempts <= 0: + return errors.New("max_attempts must be greater than 0") + case c.WaitMin != nil && *c.WaitMin <= 0: + return errors.New("wait_min must be greater than 0") + case c.WaitMax != nil && *c.WaitMax <= 0: + return errors.New("wait_max must be greater than 0") + } + return nil +} + +func (c retryConfig) getMaxAttempts() int { + if c.MaxAttempts == nil { + return 0 + } + return *c.MaxAttempts +} + +func (c retryConfig) getWaitMin() time.Duration { + if c.WaitMin == nil { + return 0 + } + return *c.WaitMin +} + +func (c retryConfig) getWaitMax() time.Duration { + if c.WaitMax == nil { + return 0 + } + return *c.WaitMax +} + +type rateLimitConfig struct { + Limit *valueTpl `config:"limit"` + Reset *valueTpl `config:"reset"` + Remaining *valueTpl `config:"remaining"` +} + +type urlConfig struct { + *url.URL +} + +func (u *urlConfig) Unpack(in string) error { + parsed, err := url.Parse(in) + if err != nil { + return err + } + + *u = urlConfig{URL: parsed} + + return nil +} + +type requestConfig struct { + URL *urlConfig `config:"url" validate:"required"` + Method string `config:"method" validate:"required"` + Body *common.MapStr `config:"body"` + Timeout *time.Duration `config:"timeout"` + SSL *tlscommon.Config `config:"ssl"` + Retry retryConfig `config:"retry"` + RedirectForwardHeaders bool `config:"redirect.forward_headers"` + RedirectHeadersBanList []string `config:"redirect.headers_ban_list"` + RedirectMaxRedirects int `config:"redirect.max_redirects"` + RateLimit *rateLimitConfig `config:"rate_limit"` + Transforms transformsConfig `config:"transforms"` +} + +func (c requestConfig) getTimeout() time.Duration { + if c.Timeout == nil { + return 0 + } + return *c.Timeout +} + +func (c *requestConfig) Validate() error { + c.Method = strings.ToUpper(c.Method) + switch c.Method { + case "POST": + case "GET": + if c.Body != nil { + return errors.New("body can't be used with method: \"GET\"") + } + default: + return fmt.Errorf("unsupported method %q", c.Method) + } + + if c.Timeout != nil && *c.Timeout <= 0 { + return errors.New("timeout must be greater than 0") + } + + if _, err := newBasicTransformsFromConfig(c.Transforms, requestNamespace, nil); err != nil { + return err + } + + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/config_response.go b/x-pack/filebeat/input/httpjson/internal/v2/config_response.go new file mode 100644 index 000000000000..6b616e79d300 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/config_response.go @@ -0,0 +1,63 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "fmt" + "strings" +) + +const ( + splitTypeArr = "array" + splitTypeMap = "map" +) + +type responseConfig struct { + Transforms transformsConfig `config:"transforms"` + Pagination transformsConfig `config:"pagination"` + Split *splitConfig `config:"split"` +} + +type splitConfig struct { + Target string `config:"target" validation:"required"` + Type string `config:"type"` + Transforms transformsConfig `config:"transforms"` + Split *splitConfig `config:"split"` + KeepParent bool `config:"keep_parent"` + KeyField string `config:"key_field"` +} + +func (c *responseConfig) Validate() error { + if _, err := newBasicTransformsFromConfig(c.Transforms, responseNamespace, nil); err != nil { + return err + } + if _, err := newBasicTransformsFromConfig(c.Pagination, paginationNamespace, nil); err != nil { + return err + } + return nil +} + +func (c *splitConfig) Validate() error { + if _, err := newBasicTransformsFromConfig(c.Transforms, responseNamespace, nil); err != nil { + return err + } + + c.Type = strings.ToLower(c.Type) + switch c.Type { + case "", splitTypeArr: + if c.KeyField != "" { + return fmt.Errorf("key_field can only be used with a %s split type", splitTypeMap) + } + case splitTypeMap: + default: + return fmt.Errorf("invalid split type: %s", c.Type) + } + + if _, err := newSplitResponse(c, nil); err != nil { + return err + } + + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/config_test.go b/x-pack/filebeat/input/httpjson/internal/v2/config_test.go new file mode 100644 index 000000000000..19693f0e727b --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/config_test.go @@ -0,0 +1,372 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "errors" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "golang.org/x/oauth2/google" + + "github.com/elastic/beats/v7/libbeat/common" +) + +func TestProviderCanonical(t *testing.T) { + const ( + a oAuth2Provider = "gOoGle" + b oAuth2Provider = "google" + ) + + assert.Equal(t, a.canonical(), b.canonical()) +} + +func TestGetProviderIsCanonical(t *testing.T) { + const expected oAuth2Provider = "google" + + oauth2 := oAuth2Config{Provider: "GOogle"} + assert.Equal(t, expected, oauth2.getProvider()) +} + +func TestIsEnabled(t *testing.T) { + oauth2 := oAuth2Config{} + if !oauth2.isEnabled() { + t.Fatal("OAuth2 should be enabled by default") + } + + var enabled = false + oauth2.Enabled = &enabled + + assert.False(t, oauth2.isEnabled()) + + enabled = true + + assert.True(t, oauth2.isEnabled()) +} + +func TestGetTokenURL(t *testing.T) { + const expected = "http://localhost" + oauth2 := oAuth2Config{TokenURL: "http://localhost"} + assert.Equal(t, expected, oauth2.getTokenURL()) +} + +func TestGetTokenURLWithAzure(t *testing.T) { + const expectedWithoutTenantID = "http://localhost" + oauth2 := oAuth2Config{TokenURL: "http://localhost", Provider: "azure"} + + assert.Equal(t, expectedWithoutTenantID, oauth2.getTokenURL()) + + oauth2.TokenURL = "" + oauth2.AzureTenantID = "a_tenant_id" + const expectedWithTenantID = "https://login.microsoftonline.com/a_tenant_id/oauth2/v2.0/token" + + assert.Equal(t, expectedWithTenantID, oauth2.getTokenURL()) + +} + +func TestGetEndpointParams(t *testing.T) { + var expected = map[string][]string{"foo": {"bar"}} + oauth2 := oAuth2Config{EndpointParams: map[string][]string{"foo": {"bar"}}} + assert.Equal(t, expected, oauth2.getEndpointParams()) +} + +func TestGetEndpointParamsWithAzure(t *testing.T) { + var expectedWithoutResource = map[string][]string{"foo": {"bar"}} + oauth2 := oAuth2Config{Provider: "azure", EndpointParams: map[string][]string{"foo": {"bar"}}} + + assert.Equal(t, expectedWithoutResource, oauth2.getEndpointParams()) + + oauth2.AzureResource = "baz" + var expectedWithResource = map[string][]string{"foo": {"bar"}, "resource": {"baz"}} + + assert.Equal(t, expectedWithResource, oauth2.getEndpointParams()) +} + +func TestConfigFailsWithInvalidMethod(t *testing.T) { + m := map[string]interface{}{ + "request.method": "DELETE", + } + cfg := common.MustNewConfigFrom(m) + conf := defaultConfig() + if err := cfg.Unpack(&conf); err == nil { + t.Fatal("Configuration validation failed. http_method DELETE is not allowed.") + } +} + +func TestConfigMustFailWithInvalidURL(t *testing.T) { + m := map[string]interface{}{ + "request.url": "::invalid::", + } + cfg := common.MustNewConfigFrom(m) + conf := defaultConfig() + err := cfg.Unpack(&conf) + assert.EqualError(t, err, `parse "::invalid::": missing protocol scheme accessing 'request.url'`) +} + +func TestConfigOauth2Validation(t *testing.T) { + cases := []struct { + name string + expectedErr string + input map[string]interface{} + setup func() + teardown func() + }{ + { + name: "can't set oauth2 and basic auth together", + expectedErr: "only one kind of auth can be enabled accessing 'auth'", + input: map[string]interface{}{ + "auth.basic.user": "user", + "auth.basic.password": "pass", + "auth.oauth2": map[string]interface{}{ + "token_url": "localhost", + "client": map[string]interface{}{ + "id": "a_client_id", + "secret": "a_client_secret", + }, + }, + }, + }, + { + name: "can set oauth2 and basic auth together if oauth2 is disabled", + input: map[string]interface{}{ + "auth.basic.user": "user", + "auth.basic.password": "pass", + "auth.oauth2": map[string]interface{}{ + "enabled": false, + "token_url": "localhost", + "client": map[string]interface{}{ + "id": "a_client_id", + "secret": "a_client_secret", + }, + }, + }, + }, + { + name: "token_url and client credentials must be set", + expectedErr: "both token_url and client credentials must be provided accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{}, + }, + }, + { + name: "must fail with an unknown provider", + expectedErr: "unknown provider \"unknown\" accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "unknown", + }, + }, + }, + { + name: "azure must have either tenant_id or token_url", + expectedErr: "at least one of token_url or tenant_id must be provided accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "azure", + }, + }, + }, + { + name: "azure must have only one of token_url and tenant_id", + expectedErr: "only one of token_url and tenant_id can be used accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "azure", + "azure.tenant_id": "a_tenant_id", + "token_url": "localhost", + }, + }, + }, + { + name: "azure must have client credentials set", + expectedErr: "client credentials must be provided accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "azure", + "azure.tenant_id": "a_tenant_id", + }, + }, + }, + { + name: "azure config is valid", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "azure", + "azure": map[string]interface{}{ + "tenant_id": "a_tenant_id", + }, + "client.id": "a_client_id", + "client.secret": "a_client_secret", + }, + }, + }, + { + name: "google can't have token_url or client credentials set", + expectedErr: "none of token_url and client credentials can be used, use google.credentials_file, google.jwt_file, google.credentials_json or ADC instead accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "azure": map[string]interface{}{ + "tenant_id": "a_tenant_id", + }, + "client.id": "a_client_id", + "client.secret": "a_client_secret", + "token_url": "localhost", + }, + }, + }, + { + name: "google must fail if no ADC available", + expectedErr: "no authentication credentials were configured or detected (ADC) accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + }, + }, + setup: func() { + // we change the default function to force a failure + findDefaultGoogleCredentials = func(context.Context, ...string) (*google.Credentials, error) { + return nil, errors.New("failed") + } + }, + teardown: func() { findDefaultGoogleCredentials = google.FindDefaultCredentials }, + }, + { + name: "google must fail if credentials file not found", + expectedErr: "the file \"./wrong\" cannot be found accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.credentials_file": "./wrong", + }, + }, + }, + { + name: "google must fail if ADC is wrongly set", + expectedErr: "no authentication credentials were configured or detected (ADC) accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + }, + }, + setup: func() { os.Setenv("GOOGLE_APPLICATION_CREDENTIALS", "./wrong") }, + }, + { + name: "google must work if ADC is set up", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + }, + }, + setup: func() { os.Setenv("GOOGLE_APPLICATION_CREDENTIALS", "./testdata/credentials.json") }, + }, + { + name: "google must work if credentials_file is correct", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.credentials_file": "./testdata/credentials.json", + }, + }, + }, + { + name: "google must work if jwt_file is correct", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.jwt_file": "./testdata/credentials.json", + }, + }, + }, + { + name: "google must work if credentials_json is correct", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.credentials_json": []byte(`{ + "type": "service_account", + "project_id": "foo", + "private_key_id": "x", + "client_email": "foo@bar.com", + "client_id": "0" + }`), + }, + }, + }, + { + name: "google must fail if credentials_json is not a valid JSON", + expectedErr: "google.credentials_json must be valid JSON accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.credentials_json": []byte(`invalid`), + }, + }, + }, + { + name: "google must fail if the provided credentials file is not a valid JSON", + expectedErr: "the file \"./testdata/invalid_credentials.json\" does not contain valid JSON accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.credentials_file": "./testdata/invalid_credentials.json", + }, + }, + }, + { + name: "google must fail if the delegated_account is set without jwt_file", + expectedErr: "google.delegated_account can only be provided with a jwt_file accessing 'auth.oauth2'", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.credentials_file": "./testdata/credentials.json", + "google.delegated_account": "delegated@account.com", + }, + }, + }, + { + name: "google must work with delegated_account and a valid jwt_file", + input: map[string]interface{}{ + "auth.oauth2": map[string]interface{}{ + "provider": "google", + "google.jwt_file": "./testdata/credentials.json", + "google.delegated_account": "delegated@account.com", + }, + }, + }, + } + + for _, c := range cases { + c := c + t.Run(c.name, func(t *testing.T) { + if c.setup != nil { + c.setup() + } + + if c.teardown != nil { + defer c.teardown() + } + + c.input["request.url"] = "localhost" + cfg := common.MustNewConfigFrom(c.input) + conf := defaultConfig() + err := cfg.Unpack(&conf) + + switch { + case c.expectedErr == "": + if err != nil { + t.Fatalf("Configuration validation failed. no error expected but got %q", err) + } + + case c.expectedErr != "": + if err == nil || err.Error() != c.expectedErr { + t.Fatalf("Configuration validation failed. expecting %q error but got %q", c.expectedErr, err) + } + } + }) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/cursor.go b/x-pack/filebeat/input/httpjson/internal/v2/cursor.go new file mode 100644 index 000000000000..053cdd87bd4b --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/cursor.go @@ -0,0 +1,64 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +type cursor struct { + log *logp.Logger + + cfg cursorConfig + + state common.MapStr +} + +func newCursor(cfg cursorConfig, log *logp.Logger) *cursor { + return &cursor{cfg: cfg, log: log} +} + +func (c *cursor) load(cursor *inputcursor.Cursor) { + if c == nil || cursor == nil || cursor.IsNew() { + c.log.Debug("new cursor: nothing loaded") + return + } + + if c.state == nil { + c.state = common.MapStr{} + } + + if err := cursor.Unpack(&c.state); err != nil { + c.log.Errorf("Reset cursor state. Failed to read from registry: %v", err) + return + } + + c.log.Debugf("cursor loaded: %v", c.state) +} + +func (c *cursor) update(trCtx *transformContext) { + if c.cfg == nil { + return + } + + if c.state == nil { + c.state = common.MapStr{} + } + + for k, cfg := range c.cfg { + v := cfg.Value.Execute(trCtx, transformable{}, cfg.Default, c.log) + _, _ = c.state.Put(k, v) + c.log.Debugf("cursor.%s stored with %s", k, v) + } +} + +func (c *cursor) clone() common.MapStr { + if c == nil || c.state == nil { + return common.MapStr{} + } + return c.state.Clone() +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/input.go b/x-pack/filebeat/input/httpjson/internal/v2/input.go new file mode 100644 index 000000000000..8ec7d1a66794 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/input.go @@ -0,0 +1,225 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "encoding/json" + "fmt" + "net" + "net/http" + "net/url" + "time" + + retryablehttp "github.com/hashicorp/go-retryablehttp" + "go.uber.org/zap" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" + "github.com/elastic/beats/v7/libbeat/common/useragent" + "github.com/elastic/beats/v7/libbeat/logp" + "github.com/elastic/go-concert/ctxtool" + "github.com/elastic/go-concert/timed" +) + +const ( + inputName = "httpjson" +) + +var ( + userAgent = useragent.UserAgent("Filebeat") + + // for testing + timeNow = time.Now +) + +type retryLogger struct { + log *logp.Logger +} + +func newRetryLogger(log *logp.Logger) *retryLogger { + return &retryLogger{ + log: log.Named("retryablehttp").WithOptions(zap.AddCallerSkip(1)), + } +} + +func (log *retryLogger) Error(format string, args ...interface{}) { + log.log.Errorf(format, args...) +} + +func (log *retryLogger) Info(format string, args ...interface{}) { + log.log.Infof(format, args...) +} + +func (log *retryLogger) Debug(format string, args ...interface{}) { + log.log.Debugf(format, args...) +} + +func (log *retryLogger) Warn(format string, args ...interface{}) { + log.log.Warnf(format, args...) +} + +func newTLSConfig(config config) (*tlscommon.TLSConfig, error) { + if err := config.Validate(); err != nil { + return nil, err + } + + tlsConfig, err := tlscommon.LoadTLSConfig(config.Request.SSL) + if err != nil { + return nil, err + } + + return tlsConfig, nil +} + +func test(url *url.URL) error { + port := func() string { + if url.Port() != "" { + return url.Port() + } + switch url.Scheme { + case "https": + return "443" + } + return "80" + }() + + _, err := net.DialTimeout("tcp", net.JoinHostPort(url.Hostname(), port), time.Second) + if err != nil { + return fmt.Errorf("url %q is unreachable", url) + } + + return nil +} + +func run( + ctx v2.Context, + config config, + tlsConfig *tlscommon.TLSConfig, + publisher inputcursor.Publisher, + cursor *inputcursor.Cursor, +) error { + log := ctx.Logger.With("url", config.Request.URL) + + stdCtx := ctxtool.FromCanceller(ctx.Cancelation) + + httpClient, err := newHTTPClient(stdCtx, config, tlsConfig, log) + if err != nil { + return err + } + + requestFactory := newRequestFactory(config.Request, config.Auth, log) + pagination := newPagination(config, httpClient, log) + responseProcessor := newResponseProcessor(config.Response, pagination, log) + requester := newRequester(httpClient, requestFactory, responseProcessor, log) + + trCtx := emptyTransformContext() + trCtx.cursor = newCursor(config.Cursor, log) + trCtx.cursor.load(cursor) + + err = timed.Periodic(stdCtx, config.Interval, func() error { + log.Info("Process another repeated request.") + + if err := requester.doRequest(stdCtx, trCtx, publisher); err != nil { + log.Errorf("Error while processing http request: %v", err) + } + + if stdCtx.Err() != nil { + return err + } + + return nil + }) + + log.Infof("Input stopped because context was cancelled with: %v", err) + + return nil +} + +func newHTTPClient(ctx context.Context, config config, tlsConfig *tlscommon.TLSConfig, log *logp.Logger) (*httpClient, error) { + timeout := config.Request.getTimeout() + + // Make retryable HTTP client + client := &retryablehttp.Client{ + HTTPClient: &http.Client{ + Transport: &http.Transport{ + DialContext: (&net.Dialer{ + Timeout: timeout, + }).DialContext, + TLSClientConfig: tlsConfig.ToConfig(), + DisableKeepAlives: true, + }, + Timeout: timeout, + CheckRedirect: checkRedirect(config.Request, log), + }, + Logger: newRetryLogger(log), + RetryWaitMin: config.Request.Retry.getWaitMin(), + RetryWaitMax: config.Request.Retry.getWaitMax(), + RetryMax: config.Request.Retry.getMaxAttempts(), + CheckRetry: retryablehttp.DefaultRetryPolicy, + Backoff: retryablehttp.DefaultBackoff, + } + + limiter := newRateLimiterFromConfig(config.Request.RateLimit, log) + + if config.Auth.OAuth2.isEnabled() { + authClient, err := config.Auth.OAuth2.client(ctx, client.StandardClient()) + if err != nil { + return nil, err + } + return &httpClient{client: authClient, limiter: limiter}, nil + } + + return &httpClient{client: client.StandardClient(), limiter: limiter}, nil +} + +func checkRedirect(config *requestConfig, log *logp.Logger) func(*http.Request, []*http.Request) error { + return func(req *http.Request, via []*http.Request) error { + log.Debug("http client: checking redirect") + if len(via) >= config.RedirectMaxRedirects { + log.Debug("http client: max redirects exceeded") + return fmt.Errorf("stopped after %d redirects", config.RedirectMaxRedirects) + } + + if !config.RedirectForwardHeaders || len(via) == 0 { + log.Debugf("http client: nothing to do while checking redirects - forward_headers: %v, via: %#v", config.RedirectForwardHeaders, via) + return nil + } + + prev := via[len(via)-1] // previous request to get headers from + + log.Debugf("http client: forwarding headers from previous request: %#v", prev.Header) + req.Header = prev.Header.Clone() + + for _, k := range config.RedirectHeadersBanList { + log.Debugf("http client: ban header %v", k) + req.Header.Del(k) + } + + return nil + } +} + +func makeEvent(body common.MapStr) (beat.Event, error) { + bodyBytes, err := json.Marshal(body) + if err != nil { + return beat.Event{}, err + } + now := timeNow() + fields := common.MapStr{ + "event": common.MapStr{ + "created": now, + }, + "message": string(bodyBytes), + } + + return beat.Event{ + Timestamp: now, + Fields: fields, + }, nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/input_cursor.go b/x-pack/filebeat/input/httpjson/internal/v2/input_cursor.go new file mode 100644 index 000000000000..537e67762df8 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/input_cursor.go @@ -0,0 +1,67 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" +) + +type cursorInput struct{} + +func (cursorInput) Name() string { + return "httpjson-cursor" +} + +type source struct { + config config + tlsConfig *tlscommon.TLSConfig +} + +func (src source) Name() string { + return src.config.Request.URL.String() +} + +func cursorConfigure(cfg *common.Config) ([]inputcursor.Source, inputcursor.Input, error) { + conf := defaultConfig() + if err := cfg.Unpack(&conf); err != nil { + return nil, nil, err + } + return newCursorInput(conf) +} + +func newCursorInput(config config) ([]inputcursor.Source, inputcursor.Input, error) { + tlsConfig, err := newTLSConfig(config) + if err != nil { + return nil, nil, err + } + // we only allow one url per config, if we wanted to allow more than one + // each source should hold only one url + return []inputcursor.Source{ + &source{config: config, + tlsConfig: tlsConfig, + }, + }, + &cursorInput{}, + nil +} + +func (in *cursorInput) Test(src inputcursor.Source, _ v2.TestContext) error { + return test((src.(*source)).config.Request.URL.URL) +} + +// Run starts the input and blocks until it ends the execution. +// It will return on context cancellation, any other error will be retried. +func (in *cursorInput) Run( + ctx v2.Context, + src inputcursor.Source, + cursor inputcursor.Cursor, + publisher inputcursor.Publisher, +) error { + s := src.(*source) + return run(ctx, s.config, s.tlsConfig, publisher, &cursor) +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/input_manager.go b/x-pack/filebeat/input/httpjson/internal/v2/input_manager.go new file mode 100644 index 000000000000..bec2991a71bd --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/input_manager.go @@ -0,0 +1,64 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "go.uber.org/multierr" + + "github.com/elastic/go-concert/unison" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +// inputManager wraps one stateless input manager +// and one cursor input manager. It will create one or the other +// based on the config that is passed. +type InputManager struct { + stateless *stateless.InputManager + cursor *inputcursor.InputManager +} + +var _ v2.InputManager = InputManager{} + +func NewInputManager(log *logp.Logger, store inputcursor.StateStore) InputManager { + sim := stateless.NewInputManager(statelessConfigure) + return InputManager{ + stateless: &sim, + cursor: &inputcursor.InputManager{ + Logger: log, + StateStore: store, + Type: inputName, + Configure: cursorConfigure, + }, + } +} + +// Init initializes both wrapped input managers. +func (m InputManager) Init(grp unison.Group, mode v2.Mode) error { + registerRequestTransforms() + registerResponseTransforms() + registerPaginationTransforms() + return multierr.Append( + m.stateless.Init(grp, mode), + m.cursor.Init(grp, mode), + ) +} + +// Create creates a cursor input manager if the config has a date cursor set up, +// otherwise it creates a stateless input manager. +func (m InputManager) Create(cfg *common.Config) (v2.Input, error) { + config := defaultConfig() + if err := cfg.Unpack(&config); err != nil { + return nil, err + } + if len(config.Cursor) == 0 { + return m.stateless.Create(cfg) + } + return m.cursor.Create(cfg) +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/input_stateless.go b/x-pack/filebeat/input/httpjson/internal/v2/input_stateless.go new file mode 100644 index 000000000000..92a1b8ae2dd9 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/input_stateless.go @@ -0,0 +1,58 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + stateless "github.com/elastic/beats/v7/filebeat/input/v2/input-stateless" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/transport/tlscommon" +) + +type statelessInput struct { + config config + tlsConfig *tlscommon.TLSConfig +} + +func (statelessInput) Name() string { + return "httpjson-stateless" +} + +func statelessConfigure(cfg *common.Config) (stateless.Input, error) { + conf := defaultConfig() + if err := cfg.Unpack(&conf); err != nil { + return nil, err + } + return newStatelessInput(conf) +} + +func newStatelessInput(config config) (*statelessInput, error) { + tlsConfig, err := newTLSConfig(config) + if err != nil { + return nil, err + } + return &statelessInput{config: config, tlsConfig: tlsConfig}, nil +} + +func (in *statelessInput) Test(v2.TestContext) error { + return test(in.config.Request.URL.URL) +} + +type statelessPublisher struct { + wrapped stateless.Publisher +} + +func (pub statelessPublisher) Publish(event beat.Event, _ interface{}) error { + pub.wrapped.Publish(event) + return nil +} + +// Run starts the input and blocks until it ends the execution. +// It will return on context cancellation, any other error will be retried. +func (in *statelessInput) Run(ctx v2.Context, publisher stateless.Publisher) error { + pub := statelessPublisher{wrapped: publisher} + return run(ctx, in.config, in.tlsConfig, pub, nil) +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/input_test.go b/x-pack/filebeat/input/httpjson/internal/v2/input_test.go new file mode 100644 index 000000000000..3f9fc342c1f9 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/input_test.go @@ -0,0 +1,519 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "golang.org/x/sync/errgroup" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" + beattest "github.com/elastic/beats/v7/libbeat/publisher/testing" +) + +func TestInput(t *testing.T) { + testCases := []struct { + name string + setupServer func(*testing.T, http.HandlerFunc, map[string]interface{}) + baseConfig map[string]interface{} + handler http.HandlerFunc + expected []string + }{ + { + name: "Test simple GET request", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test simple HTTPS GET request", + setupServer: newTestServer(httptest.NewTLSServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "request.ssl.verification_mode": "none", + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test request honors rate limit", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "http_method": "GET", + "request.rate_limit.limit": `{{.last_request.header.Get "X-Rate-Limit-Limit"}}`, + "request.rate_limit.remaining": `{{.last_request.header.Get "X-Rate-Limit-Remaining"}}`, + "request.rate_limit.reset": `{{.last_request.header.Get "X-Rate-Limit-Reset"}}`, + }, + handler: rateLimitHandler(), + expected: []string{`{"hello":"world"}`}, + }, + { + name: "Test request retries when failed", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + }, + handler: retryHandler(), + expected: []string{`{"hello":"world"}`}, + }, + { + name: "Test POST request with body", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "POST", + "request.body": map[string]interface{}{ + "test": "abc", + }, + }, + handler: defaultHandler("POST", `{"test":"abc"}`), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test repeated POST requests", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": "100ms", + "request.method": "POST", + }, + handler: defaultHandler("POST", ""), + expected: []string{ + `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, + `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`, + }, + }, + { + name: "Test split by json objects array", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "response.split": map[string]interface{}{ + "target": "body.hello", + }, + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"world":"moon"}`, `{"space":[{"cake":"pumpkin"}]}`}, + }, + { + name: "Test split by json objects array with keep parent", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "response.split": map[string]interface{}{ + "target": "body.hello", + "keep_parent": true, + }, + }, + handler: defaultHandler("GET", ""), + expected: []string{ + `{"hello":{"world":"moon"}}`, + `{"hello":{"space":[{"cake":"pumpkin"}]}}`, + }, + }, + { + name: "Test nested split", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "response.split": map[string]interface{}{ + "target": "body.hello", + "split": map[string]interface{}{ + "target": "body.space", + "keep_parent": true, + }, + }, + }, + handler: defaultHandler("GET", ""), + expected: []string{ + `{"world":"moon"}`, + `{"space":{"cake":"pumpkin"}}`, + }, + }, + { + name: "Test split events by not found", + setupServer: newTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "response.split": map[string]interface{}{ + "target": "body.unknown", + }, + }, + handler: defaultHandler("GET", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, + { + name: "Test date cursor", + setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + registerRequestTransforms() + t.Cleanup(func() { registeredTransforms = newRegistry() }) + // mock timeNow func to return a fixed value + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + t.Cleanup(func() { timeNow = time.Now }) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "request.transforms": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.$filter", + "value": "alertCreationTime ge {{.cursor.timestamp}}", + "default": `alertCreationTime ge {{formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"}}`, + }, + }, + }, + "cursor": map[string]interface{}{ + "timestamp": map[string]interface{}{ + "value": `{{index .last_response.body "@timestamp"}}`, + }, + }, + }, + handler: dateCursorHandler(), + expected: []string{ + `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`, + }, + }, + { + name: "Test pagination", + setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + registerPaginationTransforms() + t.Cleanup(func() { registeredTransforms = newRegistry() }) + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "response.split": map[string]interface{}{ + "target": "body.items", + }, + "response.pagination": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.page", + "value": "{{.last_response.body.nextPageToken}}", + }, + }, + }, + }, + handler: paginationHandler(), + expected: []string{`{"foo":"bar"}`, `{"foo":"bar"}`}, + }, + { + name: "Test pagination with array response", + setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + registerPaginationTransforms() + t.Cleanup(func() { registeredTransforms = newRegistry() }) + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "GET", + "response.pagination": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.page", + "value": `{{index (index .last_response.body 0) "nextPageToken"}}`, + }, + }, + }, + }, + handler: paginationArrayHandler(), + expected: []string{`{"nextPageToken":"bar","foo":"bar"}`, `{"foo":"bar"}`, `{"foo":"bar"}`}, + }, + { + name: "Test oauth2", + setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + server := httptest.NewServer(h) + config["request.url"] = server.URL + config["auth.oauth2.token_url"] = server.URL + "/token" + t.Cleanup(server.Close) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": "POST", + "auth.oauth2.client.id": "a_client_id", + "auth.oauth2.client.secret": "a_client_secret", + "auth.oauth2.endpoint_params": map[string]interface{}{ + "param1": "v1", + }, + "auth.oauth2.scopes": []string{"scope1", "scope2"}, + }, + handler: oauth2Handler, + expected: []string{`{"hello": "world"}`}, + }, + } + + for _, testCase := range testCases { + tc := testCase + t.Run(tc.name, func(t *testing.T) { + tc.setupServer(t, tc.handler, tc.baseConfig) + + cfg := common.MustNewConfigFrom(tc.baseConfig) + + conf := defaultConfig() + assert.NoError(t, cfg.Unpack(&conf)) + + input, err := newStatelessInput(conf) + + assert.NoError(t, err) + assert.Equal(t, "httpjson-stateless", input.Name()) + assert.NoError(t, input.Test(v2.TestContext{})) + + chanClient := beattest.NewChanClient(len(tc.expected)) + t.Cleanup(func() { _ = chanClient.Close() }) + + ctx, cancel := newV2Context() + t.Cleanup(cancel) + + var g errgroup.Group + g.Go(func() error { + return input.Run(ctx, chanClient) + }) + + timeout := time.NewTimer(5 * time.Second) + t.Cleanup(func() { _ = timeout.Stop() }) + + var receivedCount int + wait: + for { + select { + case <-timeout.C: + t.Errorf("timed out waiting for %d events", len(tc.expected)) + cancel() + return + case got := <-chanClient.Channel: + val, err := got.Fields.GetValue("message") + assert.NoError(t, err) + assert.JSONEq(t, tc.expected[receivedCount], val.(string)) + receivedCount += 1 + if receivedCount == len(tc.expected) { + cancel() + break wait + } + } + } + assert.NoError(t, g.Wait()) + }) + } +} + +func newTestServer( + newServer func(http.Handler) *httptest.Server, +) func(*testing.T, http.HandlerFunc, map[string]interface{}) { + return func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) { + server := newServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + } +} + +func newV2Context() (v2.Context, func()) { + ctx, cancel := context.WithCancel(context.Background()) + return v2.Context{ + Logger: logp.NewLogger("httpjson_test"), + ID: "test_id", + Cancelation: ctx, + }, cancel +} + +func defaultHandler(expectedMethod, expectedBody string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + msg := `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}` + switch { + case r.Method != expectedMethod: + w.WriteHeader(http.StatusBadRequest) + msg = fmt.Sprintf(`{"error":"expected method was %q"}`, expectedMethod) + case expectedBody != "": + body, _ := ioutil.ReadAll(r.Body) + r.Body.Close() + if expectedBody != string(body) { + w.WriteHeader(http.StatusBadRequest) + msg = fmt.Sprintf(`{"error":"expected body was %q"}`, expectedBody) + } + } + + _, _ = w.Write([]byte(msg)) + } +} + +func rateLimitHandler() http.HandlerFunc { + var isRetry bool + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + if isRetry { + _, _ = w.Write([]byte(`{"hello":"world"}`)) + return + } + w.Header().Set("X-Rate-Limit-Limit", "0") + w.Header().Set("X-Rate-Limit-Remaining", "0") + w.Header().Set("X-Rate-Limit-Reset", fmt.Sprint(time.Now().Unix())) + w.WriteHeader(http.StatusTooManyRequests) + isRetry = true + _, _ = w.Write([]byte(`{"error":"too many requests"}`)) + } +} + +func retryHandler() http.HandlerFunc { + count := 0 + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + if count == 2 { + _, _ = w.Write([]byte(`{"hello":"world"}`)) + return + } + w.WriteHeader(rand.Intn(100) + 500) + count += 1 + } +} + +func oauth2TokenHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + _ = r.ParseForm() + switch { + case r.Method != "POST": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong method"}`)) + case r.FormValue("grant_type") != "client_credentials": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong grant_type"}`)) + case r.FormValue("client_id") != "a_client_id": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong client_id"}`)) + case r.FormValue("client_secret") != "a_client_secret": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong client_secret"}`)) + case r.FormValue("scope") != "scope1 scope2": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong scope"}`)) + case r.FormValue("param1") != "v1": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong param1"}`)) + default: + _, _ = w.Write([]byte(`{"token_type": "Bearer", "expires_in": "60", "access_token": "abcd"}`)) + } +} + +func oauth2Handler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/token" { + oauth2TokenHandler(w, r) + return + } + + w.Header().Set("content-type", "application/json") + switch { + case r.Method != "POST": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong method"}`)) + case r.Header.Get("Authorization") != "Bearer abcd": + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong bearer"}`)) + default: + _, _ = w.Write([]byte(`{"hello":"world"}`)) + } +} + +func dateCursorHandler() http.HandlerFunc { + var count int + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + switch count { + case 0: + if r.URL.Query().Get("$filter") != "alertCreationTime ge 2002-10-02T14:50:00Z" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong initial cursor value"`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`)) + case 1: + if r.URL.Query().Get("$filter") != "alertCreationTime ge 2002-10-02T15:00:00Z" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong cursor value"`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`)) + case 2: + if r.URL.Query().Get("$filter") != "alertCreationTime ge 2002-10-02T15:00:01Z" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong cursor value"`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`)) + } + count += 1 + } +} + +func paginationHandler() http.HandlerFunc { + var count int + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + switch count { + case 0: + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:00Z","nextPageToken":"bar","items":[{"foo":"bar"}]}`)) + case 1: + if r.URL.Query().Get("page") != "bar" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong page token value"}`)) + return + } + _, _ = w.Write([]byte(`{"@timestamp":"2002-10-02T15:00:01Z","items":[{"foo":"bar"}]}`)) + } + count += 1 + } +} + +func paginationArrayHandler() http.HandlerFunc { + var count int + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json") + switch count { + case 0: + _, _ = w.Write([]byte(`[{"nextPageToken":"bar","foo":"bar"},{"foo":"bar"}]`)) + case 1: + if r.URL.Query().Get("page") != "bar" { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":"wrong page token value"}`)) + return + } + _, _ = w.Write([]byte(`[{"foo":"bar"}]`)) + } + count += 1 + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/pagination.go b/x-pack/filebeat/input/httpjson/internal/v2/pagination.go new file mode 100644 index 000000000000..6a8a15105fd5 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/pagination.go @@ -0,0 +1,161 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "encoding/json" + "io/ioutil" + "net/http" + "net/url" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +const paginationNamespace = "pagination" + +func registerPaginationTransforms() { + registerTransform(paginationNamespace, appendName, newAppendPagination) + registerTransform(paginationNamespace, deleteName, newDeletePagination) + registerTransform(paginationNamespace, setName, newSetPagination) +} + +type pagination struct { + log *logp.Logger + httpClient *httpClient + requestFactory *requestFactory +} + +func newPagination(config config, httpClient *httpClient, log *logp.Logger) *pagination { + pagination := &pagination{httpClient: httpClient, log: log} + if config.Response == nil || len(config.Response.Pagination) == 0 { + return pagination + } + + rts, _ := newBasicTransformsFromConfig(config.Request.Transforms, requestNamespace, log) + pts, _ := newBasicTransformsFromConfig(config.Response.Pagination, paginationNamespace, log) + requestFactory := newPaginationRequestFactory( + config.Request.Method, + *config.Request.URL.URL, + append(rts, pts...), + config.Auth, + log, + ) + pagination.requestFactory = requestFactory + return pagination +} + +func newPaginationRequestFactory(method string, url url.URL, ts []basicTransform, authConfig *authConfig, log *logp.Logger) *requestFactory { + // config validation already checked for errors here + rf := &requestFactory{ + url: url, + method: method, + body: &common.MapStr{}, + transforms: ts, + log: log, + } + if authConfig != nil && authConfig.Basic.isEnabled() { + rf.user = authConfig.Basic.User + rf.password = authConfig.Basic.Password + } + return rf +} + +type pageIterator struct { + pagination *pagination + + stdCtx context.Context + trCtx *transformContext + + resp *http.Response + + isFirst bool + done bool + + n int +} + +func (p *pagination) newPageIterator(stdCtx context.Context, trCtx *transformContext, resp *http.Response) *pageIterator { + return &pageIterator{ + pagination: p, + stdCtx: stdCtx, + trCtx: trCtx, + resp: resp, + isFirst: true, + } +} + +func (iter *pageIterator) next() (*response, bool, error) { + if iter == nil || iter.resp == nil || iter.done { + return nil, false, nil + } + + if iter.isFirst { + iter.isFirst = false + tr, err := iter.getPage() + if err != nil { + return nil, false, err + } + if iter.pagination.requestFactory == nil { + iter.done = true + } + return tr, true, nil + } + + httpReq, err := iter.pagination.requestFactory.newHTTPRequest(iter.stdCtx, iter.trCtx) + if err != nil { + if err == errNewURLValueNotSet { + // if this error happens here it means the transform used to pick the new url.value + // did not find any new value and we can stop paginating without error + iter.done = true + return nil, false, nil + } + return nil, false, err + } + + resp, err := iter.pagination.httpClient.do(iter.stdCtx, iter.trCtx, httpReq) + if err != nil { + return nil, false, err + } + + iter.resp = resp + + r, err := iter.getPage() + if err != nil { + return nil, false, err + } + + if r.body == nil { + iter.pagination.log.Debug("finished pagination because there is no body") + iter.done = true + return nil, false, nil + } + + return r, true, nil +} + +func (iter *pageIterator) getPage() (*response, error) { + bodyBytes, err := ioutil.ReadAll(iter.resp.Body) + if err != nil { + return nil, err + } + iter.resp.Body.Close() + + var r response + r.header = iter.resp.Header + r.url = *iter.resp.Request.URL + r.page = iter.n + + if len(bodyBytes) > 0 { + if err := json.Unmarshal(bodyBytes, &r.body); err != nil { + return nil, err + } + } + + iter.n += 1 + + return &r, nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/rate_limiter.go b/x-pack/filebeat/input/httpjson/internal/v2/rate_limiter.go new file mode 100644 index 000000000000..5c7e2c16a985 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/rate_limiter.go @@ -0,0 +1,140 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "errors" + "fmt" + "net/http" + "strconv" + "time" + + "github.com/elastic/beats/v7/libbeat/logp" +) + +type rateLimiter struct { + log *logp.Logger + + limit *valueTpl + reset *valueTpl + remaining *valueTpl +} + +func newRateLimiterFromConfig(config *rateLimitConfig, log *logp.Logger) *rateLimiter { + if config == nil { + return nil + } + + return &rateLimiter{ + log: log, + limit: config.Limit, + reset: config.Reset, + remaining: config.Remaining, + } +} + +func (r *rateLimiter) execute(ctx context.Context, f func() (*http.Response, error)) (*http.Response, error) { + for { + resp, err := f() + if err != nil { + return nil, err + } + + if err != nil { + return nil, fmt.Errorf("failed to read http.response.body: %w", err) + } + + if r == nil || resp.StatusCode == http.StatusOK { + return resp, nil + } + + if resp.StatusCode != http.StatusTooManyRequests { + return nil, fmt.Errorf("http request was unsuccessful with a status code %d", resp.StatusCode) + } + + if err := r.applyRateLimit(ctx, resp); err != nil { + return nil, err + } + } +} + +// applyRateLimit applies appropriate rate limit if specified in the HTTP Header of the response +func (r *rateLimiter) applyRateLimit(ctx context.Context, resp *http.Response) error { + epoch, err := r.getRateLimit(resp) + if err != nil { + return err + } + + t := time.Unix(epoch, 0) + w := time.Until(t) + if epoch == 0 || w <= 0 { + r.log.Debugf("Rate Limit: No need to apply rate limit.") + return nil + } + r.log.Debugf("Rate Limit: Wait until %v for the rate limit to reset.", t) + ticker := time.NewTicker(w) + defer ticker.Stop() + + select { + case <-ctx.Done(): + r.log.Info("Context done.") + return nil + case <-ticker.C: + r.log.Debug("Rate Limit: time is up.") + return nil + } +} + +// getRateLimit gets the rate limit value if specified in the response, +// and returns an int64 value in seconds since unix epoch for rate limit reset time. +// When there is a remaining rate limit quota, or when the rate limit reset time has expired, it +// returns 0 for the epoch value. +func (r *rateLimiter) getRateLimit(resp *http.Response) (int64, error) { + if r == nil { + return 0, nil + } + + if r.remaining == nil { + return 0, nil + } + + tr := transformable{} + tr.setHeader(resp.Header) + + remaining := r.remaining.Execute(emptyTransformContext(), tr, nil, r.log) + if remaining == "" { + return 0, errors.New("remaining value is empty") + } + m, err := strconv.ParseInt(remaining, 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse rate-limit remaining value: %w", err) + } + + if m != 0 { + return 0, nil + } + + if r.reset == nil { + r.log.Warn("reset rate limit is not set") + return 0, nil + } + + reset := r.reset.Execute(emptyTransformContext(), tr, nil, r.log) + if reset == "" { + return 0, errors.New("reset value is empty") + } + + epoch, err := strconv.ParseInt(reset, 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse rate-limit reset value: %w", err) + } + + if timeNow().Unix() > epoch { + return 0, nil + } + + return epoch, nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/rate_limiter_test.go b/x-pack/filebeat/input/httpjson/internal/v2/rate_limiter_test.go new file mode 100644 index 000000000000..cdaa4398d8aa --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/rate_limiter_test.go @@ -0,0 +1,90 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "net/http" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/logp" +) + +// Test getRateLimit function with a remaining quota, expect to receive 0, nil. +func TestGetRateLimitReturns0IfRemainingQuota(t *testing.T) { + header := make(http.Header) + header.Add("X-Rate-Limit-Limit", "120") + header.Add("X-Rate-Limit-Remaining", "118") + header.Add("X-Rate-Limit-Reset", "1581658643") + tplLimit := &valueTpl{} + tplReset := &valueTpl{} + tplRemaining := &valueTpl{} + assert.NoError(t, tplLimit.Unpack(`{{.header.Get "X-Rate-Limit-Limit"}}`)) + assert.NoError(t, tplReset.Unpack(`{{.header.Get "X-Rate-Limit-Reset"}}`)) + assert.NoError(t, tplRemaining.Unpack(`{{.header.Get "X-Rate-Limit-Remaining"}}`)) + rateLimit := &rateLimiter{ + limit: tplLimit, + reset: tplReset, + remaining: tplRemaining, + log: logp.NewLogger(""), + } + resp := &http.Response{Header: header} + epoch, err := rateLimit.getRateLimit(resp) + assert.NoError(t, err) + assert.EqualValues(t, 0, epoch) +} + +func TestGetRateLimitReturns0IfEpochInPast(t *testing.T) { + header := make(http.Header) + header.Add("X-Rate-Limit-Limit", "10") + header.Add("X-Rate-Limit-Remaining", "0") + header.Add("X-Rate-Limit-Reset", "1581658643") + tplLimit := &valueTpl{} + tplReset := &valueTpl{} + tplRemaining := &valueTpl{} + assert.NoError(t, tplLimit.Unpack(`{{.header.Get "X-Rate-Limit-Limit"}}`)) + assert.NoError(t, tplReset.Unpack(`{{.header.Get "X-Rate-Limit-Reset"}}`)) + assert.NoError(t, tplRemaining.Unpack(`{{.header.Get "X-Rate-Limit-Remaining"}}`)) + rateLimit := &rateLimiter{ + limit: tplLimit, + reset: tplReset, + remaining: tplRemaining, + log: logp.NewLogger(""), + } + resp := &http.Response{Header: header} + epoch, err := rateLimit.getRateLimit(resp) + assert.NoError(t, err) + assert.EqualValues(t, 0, epoch) +} + +func TestGetRateLimitReturnsResetValue(t *testing.T) { + epoch := int64(1604582732 + 100) + timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } + t.Cleanup(func() { timeNow = time.Now }) + + header := make(http.Header) + header.Add("X-Rate-Limit-Limit", "10") + header.Add("X-Rate-Limit-Remaining", "0") + header.Add("X-Rate-Limit-Reset", strconv.FormatInt(epoch, 10)) + tplLimit := &valueTpl{} + tplReset := &valueTpl{} + tplRemaining := &valueTpl{} + assert.NoError(t, tplLimit.Unpack(`{{.header.Get "X-Rate-Limit-Limit"}}`)) + assert.NoError(t, tplReset.Unpack(`{{.header.Get "X-Rate-Limit-Reset"}}`)) + assert.NoError(t, tplRemaining.Unpack(`{{.header.Get "X-Rate-Limit-Remaining"}}`)) + rateLimit := &rateLimiter{ + limit: tplLimit, + reset: tplReset, + remaining: tplRemaining, + log: logp.NewLogger(""), + } + resp := &http.Response{Header: header} + epoch2, err := rateLimit.getRateLimit(resp) + assert.NoError(t, err) + assert.EqualValues(t, 1604582832, epoch2) +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/request.go b/x-pack/filebeat/input/httpjson/internal/v2/request.go new file mode 100644 index 000000000000..f0eab7108579 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/request.go @@ -0,0 +1,205 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "net/url" + + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +const requestNamespace = "request" + +func registerRequestTransforms() { + registerTransform(requestNamespace, appendName, newAppendRequest) + registerTransform(requestNamespace, deleteName, newDeleteRequest) + registerTransform(requestNamespace, setName, newSetRequest) +} + +type httpClient struct { + client *http.Client + limiter *rateLimiter +} + +func (c *httpClient) do(stdCtx context.Context, trCtx *transformContext, req *http.Request) (*http.Response, error) { + resp, err := c.limiter.execute(stdCtx, func() (*http.Response, error) { + return c.client.Do(req) + }) + if err != nil { + return nil, fmt.Errorf("failed to execute http client.Do: %w", err) + } + if resp.StatusCode > 399 { + body, _ := ioutil.ReadAll(resp.Body) + resp.Body.Close() + return nil, fmt.Errorf("server responded with status code %d: %s", resp.StatusCode, string(body)) + } + return resp, nil +} + +func (rf *requestFactory) newRequest(ctx *transformContext) (transformable, error) { + req := transformable{} + req.setURL(rf.url) + + if rf.body != nil && len(*rf.body) > 0 { + req.setBody(rf.body.Clone()) + } + + header := http.Header{} + header.Set("Accept", "application/json") + header.Set("User-Agent", userAgent) + if rf.method == "POST" { + header.Set("Content-Type", "application/json") + } + req.setHeader(header) + + var err error + for _, t := range rf.transforms { + req, err = t.run(ctx, req) + if err != nil { + return transformable{}, err + } + } + + rf.log.Debugf("new request: %#v", req) + + return req, nil +} + +type requestFactory struct { + url url.URL + method string + body *common.MapStr + transforms []basicTransform + user string + password string + log *logp.Logger +} + +func newRequestFactory(config *requestConfig, authConfig *authConfig, log *logp.Logger) *requestFactory { + // config validation already checked for errors here + ts, _ := newBasicTransformsFromConfig(config.Transforms, requestNamespace, log) + rf := &requestFactory{ + url: *config.URL.URL, + method: config.Method, + body: config.Body, + transforms: ts, + log: log, + } + if authConfig != nil && authConfig.Basic.isEnabled() { + rf.user = authConfig.Basic.User + rf.password = authConfig.Basic.Password + } + return rf +} + +func (rf *requestFactory) newHTTPRequest(stdCtx context.Context, trCtx *transformContext) (*http.Request, error) { + trReq, err := rf.newRequest(trCtx) + if err != nil { + return nil, err + } + + var body []byte + if len(trReq.body()) > 0 { + switch rf.method { + case "POST": + body, err = json.Marshal(trReq.body()) + if err != nil { + return nil, err + } + default: + rf.log.Errorf("A body is set, but method is not POST. The body will be ignored.") + } + } + + url := trReq.url() + req, err := http.NewRequest(rf.method, url.String(), bytes.NewBuffer(body)) + if err != nil { + return nil, err + } + + req = req.WithContext(stdCtx) + + req.Header = trReq.header().Clone() + + if rf.user != "" || rf.password != "" { + req.SetBasicAuth(rf.user, rf.password) + } + + return req, nil +} + +type requester struct { + log *logp.Logger + client *httpClient + requestFactory *requestFactory + responseProcessor *responseProcessor +} + +func newRequester( + client *httpClient, + requestFactory *requestFactory, + responseProcessor *responseProcessor, + log *logp.Logger) *requester { + return &requester{ + log: log, + client: client, + requestFactory: requestFactory, + responseProcessor: responseProcessor, + } +} + +func (r *requester) doRequest(stdCtx context.Context, trCtx *transformContext, publisher inputcursor.Publisher) error { + req, err := r.requestFactory.newHTTPRequest(stdCtx, trCtx) + if err != nil { + return fmt.Errorf("failed to create http request: %w", err) + } + + httpResp, err := r.client.do(stdCtx, trCtx, req) + if err != nil { + return fmt.Errorf("failed to execute http client.Do: %w", err) + } + defer httpResp.Body.Close() + + eventsCh, err := r.responseProcessor.startProcessing(stdCtx, trCtx, httpResp) + if err != nil { + return err + } + + var n int + for maybeMsg := range eventsCh { + if maybeMsg.failed() { + r.log.Errorf("error processing response: %v", maybeMsg) + continue + } + + event, err := makeEvent(maybeMsg.msg) + if err != nil { + r.log.Errorf("error creating event: %v", maybeMsg) + continue + } + + if err := publisher.Publish(event, trCtx.cursorMap()); err != nil { + r.log.Errorf("error publishing event: %v", err) + continue + } + + trCtx.updateLastEvent(maybeMsg.msg) + n += 1 + } + + trCtx.updateCursor() + + r.log.Infof("request finished: %d events published", n) + + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/response.go b/x-pack/filebeat/input/httpjson/internal/v2/response.go new file mode 100644 index 000000000000..cc5f6605bc5d --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/response.go @@ -0,0 +1,178 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "context" + "net/http" + "net/url" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +const responseNamespace = "response" + +func registerResponseTransforms() { + registerTransform(responseNamespace, appendName, newAppendResponse) + registerTransform(responseNamespace, deleteName, newDeleteResponse) + registerTransform(responseNamespace, setName, newSetResponse) +} + +type response struct { + page int + url url.URL + header http.Header + body interface{} +} + +func (resp *response) clone() *response { + clone := &response{ + page: resp.page, + header: resp.header.Clone(), + url: resp.url, + } + + switch t := resp.body.(type) { + case []interface{}: + c := make([]interface{}, len(t)) + copy(c, t) + clone.body = c + case common.MapStr: + clone.body = t.Clone() + case map[string]interface{}: + clone.body = common.MapStr(t).Clone() + } + + return clone +} + +type responseProcessor struct { + log *logp.Logger + transforms []basicTransform + split *split + pagination *pagination +} + +func newResponseProcessor(config *responseConfig, pagination *pagination, log *logp.Logger) *responseProcessor { + rp := &responseProcessor{ + pagination: pagination, + log: log, + } + if config == nil { + return rp + } + ts, _ := newBasicTransformsFromConfig(config.Transforms, responseNamespace, log) + rp.transforms = ts + + split, _ := newSplitResponse(config.Split, log) + + rp.split = split + + return rp +} + +func (rp *responseProcessor) startProcessing(stdCtx context.Context, trCtx *transformContext, resp *http.Response) (<-chan maybeMsg, error) { + ch := make(chan maybeMsg) + + go func() { + defer close(ch) + + iter := rp.pagination.newPageIterator(stdCtx, trCtx, resp) + for { + page, hasNext, err := iter.next() + if err != nil { + ch <- maybeMsg{err: err} + return + } + + if !hasNext { + return + } + + respTrs := page.asTransformables(rp.log) + + if len(respTrs) == 0 { + return + } + + trCtx.updateLastResponse(*page) + + rp.log.Debugf("last received page: %#v", trCtx.lastResponse) + + for _, tr := range respTrs { + for _, t := range rp.transforms { + tr, err = t.run(trCtx, tr) + if err != nil { + ch <- maybeMsg{err: err} + return + } + } + + if rp.split == nil { + ch <- maybeMsg{msg: tr.body()} + rp.log.Debug("no split found: continuing") + continue + } + + if err := rp.split.run(trCtx, tr, ch); err != nil { + if err == errEmptyField { + // nothing else to send for this page + rp.log.Debug("split operation finished") + continue + } + rp.log.Debug("split operation failed") + ch <- maybeMsg{err: err} + return + } + } + } + }() + + return ch, nil +} + +func (resp *response) asTransformables(log *logp.Logger) []transformable { + var ts []transformable + + convertAndAppend := func(m map[string]interface{}) { + tr := transformable{} + tr.setHeader(resp.header.Clone()) + tr.setURL(resp.url) + tr.setBody(common.MapStr(m).Clone()) + ts = append(ts, tr) + } + + switch tresp := resp.body.(type) { + case []interface{}: + for _, v := range tresp { + m, ok := v.(map[string]interface{}) + if !ok { + log.Debugf("events must be JSON objects, but got %T: skipping", v) + continue + } + convertAndAppend(m) + } + case map[string]interface{}: + convertAndAppend(tresp) + default: + log.Debugf("response is not a valid JSON") + } + + return ts +} + +func (resp *response) templateValues() common.MapStr { + if resp == nil { + return common.MapStr{} + } + return common.MapStr{ + "header": resp.header.Clone(), + "page": resp.page, + "url.value": resp.url.String(), + "params": resp.url.Query(), + "body": resp.body, + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/split.go b/x-pack/filebeat/input/httpjson/internal/v2/split.go new file mode 100644 index 000000000000..17d5c2a7c9bd --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/split.go @@ -0,0 +1,185 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "errors" + "fmt" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +var ( + errEmptyField = errors.New("the requested field is empty") + errExpectedSplitArr = errors.New("split was expecting field to be an array") + errExpectedSplitObj = errors.New("split was expecting field to be an object") +) + +type split struct { + log *logp.Logger + targetInfo targetInfo + kind string + transforms []basicTransform + child *split + keepParent bool + keyField string +} + +func newSplitResponse(cfg *splitConfig, log *logp.Logger) (*split, error) { + if cfg == nil { + return nil, nil + } + + split, err := newSplit(cfg, log) + if err != nil { + return nil, err + } + + if split.targetInfo.Type != targetBody { + return nil, fmt.Errorf("invalid target type: %s", split.targetInfo.Type) + } + + return split, nil +} + +func newSplit(c *splitConfig, log *logp.Logger) (*split, error) { + ti, err := getTargetInfo(c.Target) + if err != nil { + return nil, err + } + + ts, err := newBasicTransformsFromConfig(c.Transforms, responseNamespace, log) + if err != nil { + return nil, err + } + + var s *split + if c.Split != nil { + s, err = newSplitResponse(c.Split, log) + if err != nil { + return nil, err + } + } + + return &split{ + log: log, + targetInfo: ti, + kind: c.Type, + keepParent: c.KeepParent, + keyField: c.KeyField, + transforms: ts, + child: s, + }, nil +} + +func (s *split) run(ctx *transformContext, resp transformable, ch chan<- maybeMsg) error { + root := resp.body() + return s.split(ctx, root, ch) +} + +func (s *split) split(ctx *transformContext, root common.MapStr, ch chan<- maybeMsg) error { + v, err := root.GetValue(s.targetInfo.Name) + if err != nil && err != common.ErrKeyNotFound { + return err + } + + if v == nil { + ch <- maybeMsg{msg: root} + return errEmptyField + } + + switch s.kind { + case "", splitTypeArr: + varr, ok := v.([]interface{}) + if !ok { + return errExpectedSplitArr + } + + if len(varr) == 0 { + ch <- maybeMsg{msg: root} + return errEmptyField + } + + for _, e := range varr { + if err := s.sendMessage(ctx, root, "", e, ch); err != nil { + s.log.Debug(err) + } + } + + return nil + case splitTypeMap: + vmap, ok := toMapStr(v) + if !ok { + return errExpectedSplitObj + } + + if len(vmap) == 0 { + ch <- maybeMsg{msg: root} + return errEmptyField + } + + for k, e := range vmap { + if err := s.sendMessage(ctx, root, k, e, ch); err != nil { + s.log.Debug(err) + } + } + + return nil + } + + return errors.New("unknown split type") +} + +func (s *split) sendMessage(ctx *transformContext, root common.MapStr, key string, v interface{}, ch chan<- maybeMsg) error { + obj, ok := toMapStr(v) + if !ok { + return errExpectedSplitObj + } + + clone := root.Clone() + + if s.keyField != "" && key != "" { + _, _ = obj.Put(s.keyField, key) + } + + if s.keepParent { + _, _ = clone.Put(s.targetInfo.Name, obj) + } else { + clone = obj + } + + tr := transformable{} + tr.setBody(clone) + + var err error + for _, t := range s.transforms { + tr, err = t.run(ctx, tr) + if err != nil { + return err + } + } + + if s.child != nil { + return s.child.split(ctx, clone, ch) + } + + ch <- maybeMsg{msg: clone} + + return nil +} + +func toMapStr(v interface{}) (common.MapStr, bool) { + if v == nil { + return common.MapStr{}, false + } + switch t := v.(type) { + case common.MapStr: + return t, true + case map[string]interface{}: + return common.MapStr(t), true + } + return common.MapStr{}, false +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/split_test.go b/x-pack/filebeat/input/httpjson/internal/v2/split_test.go new file mode 100644 index 000000000000..2e1f7c59b700 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/split_test.go @@ -0,0 +1,362 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func TestSplit(t *testing.T) { + registerResponseTransforms() + t.Cleanup(func() { registeredTransforms = newRegistry() }) + cases := []struct { + name string + config *splitConfig + ctx *transformContext + resp transformable + expectedMessages []common.MapStr + expectedErr error + }{ + { + name: "Two nested Split Arrays with keep_parent", + config: &splitConfig{ + Target: "body.alerts", + Type: "array", + KeepParent: true, + Split: &splitConfig{ + Target: "body.alerts.entities", + Type: "array", + KeepParent: true, + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "this": "is kept", + "alerts": []interface{}{ + map[string]interface{}{ + "this_is": "also kept", + "entities": []interface{}{ + map[string]interface{}{ + "something": "something", + }, + map[string]interface{}{ + "else": "else", + }, + }, + }, + map[string]interface{}{ + "this_is": "also kept 2", + "entities": []interface{}{ + map[string]interface{}{ + "something": "something 2", + }, + map[string]interface{}{ + "else": "else 2", + }, + }, + }, + }, + }, + }, + expectedMessages: []common.MapStr{ + { + "this": "is kept", + "alerts.this_is": "also kept", + "alerts.entities.something": "something", + }, + { + "this": "is kept", + "alerts.this_is": "also kept", + "alerts.entities.else": "else", + }, + { + "this": "is kept", + "alerts.this_is": "also kept 2", + "alerts.entities.something": "something 2", + }, + { + "this": "is kept", + "alerts.this_is": "also kept 2", + "alerts.entities.else": "else 2", + }, + }, + expectedErr: nil, + }, + { + name: "A nested array with a nested map", + config: &splitConfig{ + Target: "body.alerts", + Type: "array", + KeepParent: false, + Split: &splitConfig{ + Target: "body.entities", + Type: "map", + KeepParent: true, + KeyField: "id", + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "this": "is not kept", + "alerts": []interface{}{ + map[string]interface{}{ + "this_is": "kept", + "entities": map[string]interface{}{ + "id1": map[string]interface{}{ + "something": "else", + }, + }, + }, + map[string]interface{}{ + "this_is": "also kept", + "entities": map[string]interface{}{ + "id2": map[string]interface{}{ + "something": "else 2", + }, + }, + }, + }, + }, + }, + expectedMessages: []common.MapStr{ + { + "this_is": "kept", + "entities.id": "id1", + "entities.something": "else", + }, + { + "this_is": "also kept", + "entities.id": "id2", + "entities.something": "else 2", + }, + }, + expectedErr: nil, + }, + { + name: "A nested array with a nested map with transforms", + config: &splitConfig{ + Target: "body.alerts", + Type: "array", + Split: &splitConfig{ + Target: "body.entities", + Type: "map", + Transforms: transformsConfig{ + common.MustNewConfigFrom(map[string]interface{}{ + "set": map[string]interface{}{ + "target": "body.foo", + "value": "set for each", + }, + }), + }, + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "this": "is not kept", + "alerts": []interface{}{ + map[string]interface{}{ + "this_is": "kept", + "entities": map[string]interface{}{ + "id1": map[string]interface{}{ + "something": "else", + }, + }, + }, + map[string]interface{}{ + "this_is": "also not kept", + "entities": map[string]interface{}{ + "id2": map[string]interface{}{ + "something": "else 2", + }, + }, + }, + }, + }, + }, + expectedMessages: []common.MapStr{ + { + "something": "else", + "foo": "set for each", + }, + { + "something": "else 2", + "foo": "set for each", + }, + }, + expectedErr: nil, + }, + { + name: "A nested array with a nested array in an object", + config: &splitConfig{ + Target: "body.response", + Type: "array", + Split: &splitConfig{ + Target: "body.Event.Attributes", + KeepParent: true, + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "response": []interface{}{ + map[string]interface{}{ + "Event": map[string]interface{}{ + "timestamp": "1606324417", + "Attributes": []interface{}{ + map[string]interface{}{ + "key": "value", + }, + map[string]interface{}{ + "key2": "value2", + }, + }, + }, + }, + }, + }, + }, + expectedMessages: []common.MapStr{ + { + "Event": common.MapStr{ + "timestamp": "1606324417", + "Attributes": common.MapStr{ + "key": "value", + }, + }, + }, + { + "Event": common.MapStr{ + "timestamp": "1606324417", + "Attributes": common.MapStr{ + "key2": "value2", + }, + }, + }, + }, + expectedErr: nil, + }, + { + name: "A nested array with an empty nested array in an object publishes without the key", + config: &splitConfig{ + Target: "body.response", + Type: "array", + Split: &splitConfig{ + Target: "body.Event.Attributes", + KeepParent: true, + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "response": []interface{}{ + map[string]interface{}{ + "Event": map[string]interface{}{ + "timestamp": "1606324417", + }, + }, + }, + }, + }, + expectedMessages: []common.MapStr{ + { + "Event": common.MapStr{ + "timestamp": "1606324417", + }, + }, + }, + }, + { + name: "First level split skips publish if no events and keep_parent: false", + config: &splitConfig{ + Target: "body.response", + Type: "array", + Split: &splitConfig{ + Target: "body.Event.Attributes", + KeepParent: false, + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "response": []interface{}{}, + }, + }, + expectedMessages: []common.MapStr{ + {"response": []interface{}{}}, + }, + expectedErr: errEmptyField, + }, + { + name: "Changes must be local to parent when nested splits", + config: &splitConfig{ + Target: "body.items", + Type: "array", + Split: &splitConfig{ + Target: "body.splitHere.splitMore", + Type: "array", + KeepParent: true, + }, + }, + ctx: emptyTransformContext(), + resp: transformable{ + "body": common.MapStr{ + "@timestamp": "1234567890", + "nextPageToken": "tok", + "items": []interface{}{ + common.MapStr{"foo": "bar"}, + common.MapStr{ + "baz": "buzz", + "splitHere": common.MapStr{ + "splitMore": []interface{}{ + common.MapStr{ + "deepest1": "data", + }, + common.MapStr{ + "deepest2": "data", + }, + }, + }, + }, + }, + }, + }, + expectedMessages: []common.MapStr{ + {"foo": "bar"}, + {"baz": "buzz", "splitHere": common.MapStr{"splitMore": common.MapStr{"deepest1": "data"}}}, + {"baz": "buzz", "splitHere": common.MapStr{"splitMore": common.MapStr{"deepest2": "data"}}}, + }, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + ch := make(chan maybeMsg, len(tc.expectedMessages)) + split, err := newSplitResponse(tc.config, logp.NewLogger("")) + assert.NoError(t, err) + err = split.run(tc.ctx, tc.resp, ch) + if tc.expectedErr == nil { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.expectedErr.Error()) + } + close(ch) + assert.Equal(t, len(tc.expectedMessages), len(ch)) + for _, msg := range tc.expectedMessages { + e := <-ch + assert.NoError(t, e.err) + assert.Equal(t, msg.Flatten(), e.msg.Flatten()) + } + }) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/testdata/credentials.json b/x-pack/filebeat/input/httpjson/internal/v2/testdata/credentials.json new file mode 100644 index 000000000000..2b5fdd89e5cf --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/testdata/credentials.json @@ -0,0 +1,7 @@ +{ + "type": "service_account", + "project_id": "foo", + "private_key_id": "x", + "client_email": "foo@bar.com", + "client_id": "0" +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/testdata/invalid_credentials.json b/x-pack/filebeat/input/httpjson/internal/v2/testdata/invalid_credentials.json new file mode 100644 index 000000000000..9977a2836c1a --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/testdata/invalid_credentials.json @@ -0,0 +1 @@ +invalid diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform.go b/x-pack/filebeat/input/httpjson/internal/v2/transform.go new file mode 100644 index 000000000000..d6ca03a84f2b --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform.go @@ -0,0 +1,224 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "fmt" + "net/http" + "net/url" + "sync" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +const logName = "httpjson.transforms" + +type transformsConfig []*common.Config + +type transforms []transform + +type transformContext struct { + lock sync.RWMutex + cursor *cursor + lastEvent *common.MapStr + lastResponse *response +} + +func emptyTransformContext() *transformContext { + return &transformContext{ + cursor: &cursor{}, + lastEvent: &common.MapStr{}, + lastResponse: &response{}, + } +} + +func (ctx *transformContext) cursorMap() common.MapStr { + ctx.lock.RLock() + defer ctx.lock.RUnlock() + return ctx.cursor.clone() +} + +func (ctx *transformContext) lastEventClone() *common.MapStr { + ctx.lock.RLock() + defer ctx.lock.RUnlock() + clone := ctx.lastEvent.Clone() + return &clone +} + +func (ctx *transformContext) lastResponseClone() *response { + ctx.lock.RLock() + defer ctx.lock.RUnlock() + return ctx.lastResponse.clone() +} + +func (ctx *transformContext) updateCursor() { + ctx.lock.Lock() + defer ctx.lock.Unlock() + + // we do not want to pass the cursor data to itself + newCtx := emptyTransformContext() + newCtx.lastEvent = ctx.lastEvent + newCtx.lastResponse = ctx.lastResponse + + ctx.cursor.update(newCtx) +} + +func (ctx *transformContext) updateLastEvent(e common.MapStr) { + ctx.lock.Lock() + defer ctx.lock.Unlock() + *ctx.lastEvent = e +} + +func (ctx *transformContext) updateLastResponse(r response) { + ctx.lock.Lock() + defer ctx.lock.Unlock() + *ctx.lastResponse = r +} + +type transformable common.MapStr + +func (tr transformable) access() common.MapStr { + return common.MapStr(tr) +} + +func (tr transformable) Put(k string, v interface{}) { + _, _ = tr.access().Put(k, v) +} + +func (tr transformable) GetValue(k string) (interface{}, error) { + return tr.access().GetValue(k) +} + +func (tr transformable) Clone() transformable { + return transformable(tr.access().Clone()) +} + +func (tr transformable) setHeader(v http.Header) { + tr.Put("header", v) +} + +func (tr transformable) header() http.Header { + val, err := tr.GetValue("header") + if err != nil { + return http.Header{} + } + + header, ok := val.(http.Header) + if !ok { + return http.Header{} + } + + return header +} + +func (tr transformable) setBody(v common.MapStr) { + tr.Put("body", v) +} + +func (tr transformable) body() common.MapStr { + val, err := tr.GetValue("body") + if err != nil { + return common.MapStr{} + } + + body, ok := val.(common.MapStr) + if !ok { + return common.MapStr{} + } + + return body +} + +func (tr transformable) setURL(v url.URL) { + tr.Put("url", v) +} + +func (tr transformable) url() url.URL { + val, err := tr.GetValue("url") + if err != nil { + return url.URL{} + } + + u, ok := val.(url.URL) + if !ok { + return url.URL{} + } + + return u +} + +type transform interface { + transformName() string +} + +type basicTransform interface { + transform + run(*transformContext, transformable) (transformable, error) +} + +type maybeMsg struct { + err error + msg common.MapStr +} + +func (e maybeMsg) failed() bool { return e.err != nil } + +func (e maybeMsg) Error() string { return e.err.Error() } + +// newTransformsFromConfig creates a list of transforms from a list of free user configurations. +func newTransformsFromConfig(config transformsConfig, namespace string, log *logp.Logger) (transforms, error) { + var trans transforms + + for _, tfConfig := range config { + if len(tfConfig.GetFields()) != 1 { + return nil, errors.Errorf( + "each transform must have exactly one action, but found %d actions", + len(tfConfig.GetFields()), + ) + } + + actionName := tfConfig.GetFields()[0] + cfg, err := tfConfig.Child(actionName, -1) + if err != nil { + return nil, err + } + + constructor, found := registeredTransforms.get(namespace, actionName) + if !found { + return nil, errors.Errorf("the transform %s does not exist. Valid transforms: %s", actionName, registeredTransforms.String()) + } + + cfg.PrintDebugf("Configure transform '%v' with:", actionName) + transform, err := constructor(cfg, log) + if err != nil { + return nil, err + } + + trans = append(trans, transform) + } + + return trans, nil +} + +func newBasicTransformsFromConfig(config transformsConfig, namespace string, log *logp.Logger) ([]basicTransform, error) { + ts, err := newTransformsFromConfig(config, namespace, log) + if err != nil { + return nil, err + } + + var rts []basicTransform + for _, t := range ts { + rt, ok := t.(basicTransform) + if !ok { + return nil, fmt.Errorf("transform %s is not a valid %s transform", t.transformName(), namespace) + } + rts = append(rts, rt) + } + + return rts, nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_append.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_append.go new file mode 100644 index 000000000000..6a5867e5bbb9 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_append.go @@ -0,0 +1,163 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "fmt" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +const appendName = "append" + +type appendConfig struct { + Target string `config:"target"` + Value *valueTpl `config:"value"` + Default *valueTpl `config:"default"` +} + +type appendt struct { + log *logp.Logger + targetInfo targetInfo + value *valueTpl + defaultValue *valueTpl + + runFunc func(ctx *transformContext, transformable transformable, key, val string) error +} + +func (appendt) transformName() string { return appendName } + +func newAppendRequest(cfg *common.Config, log *logp.Logger) (transform, error) { + append, err := newAppend(cfg, log) + if err != nil { + return nil, err + } + + switch append.targetInfo.Type { + case targetBody: + append.runFunc = appendBody + case targetHeader: + append.runFunc = appendHeader + case targetURLParams: + append.runFunc = appendURLParams + default: + return nil, fmt.Errorf("invalid target type: %s", append.targetInfo.Type) + } + + return &append, nil +} + +func newAppendResponse(cfg *common.Config, log *logp.Logger) (transform, error) { + append, err := newAppend(cfg, log) + if err != nil { + return nil, err + } + + switch append.targetInfo.Type { + case targetBody: + append.runFunc = appendBody + default: + return nil, fmt.Errorf("invalid target type: %s", append.targetInfo.Type) + } + + return &append, nil +} + +func newAppendPagination(cfg *common.Config, log *logp.Logger) (transform, error) { + append, err := newAppend(cfg, log) + if err != nil { + return nil, err + } + + switch append.targetInfo.Type { + case targetBody: + append.runFunc = appendBody + case targetHeader: + append.runFunc = appendHeader + case targetURLParams: + append.runFunc = appendURLParams + default: + return nil, fmt.Errorf("invalid target type: %s", append.targetInfo.Type) + } + + return &append, nil +} + +func newAppend(cfg *common.Config, log *logp.Logger) (appendt, error) { + c := &appendConfig{} + if err := cfg.Unpack(c); err != nil { + return appendt{}, errors.Wrap(err, "fail to unpack the append configuration") + } + + ti, err := getTargetInfo(c.Target) + if err != nil { + return appendt{}, err + } + + return appendt{ + log: log, + targetInfo: ti, + value: c.Value, + defaultValue: c.Default, + }, nil +} + +func (append *appendt) run(ctx *transformContext, tr transformable) (transformable, error) { + value := append.value.Execute(ctx, tr, append.defaultValue, append.log) + if err := append.runFunc(ctx, tr, append.targetInfo.Name, value); err != nil { + return transformable{}, err + } + return tr, nil +} + +func appendToCommonMap(m common.MapStr, key, val string) error { + if val == "" { + return nil + } + var value interface{} = val + if found, _ := m.HasKey(key); found { + prev, _ := m.GetValue(key) + switch t := prev.(type) { + case []string: + value = append(t, val) + case []interface{}: + value = append(t, val) + default: + value = []interface{}{prev, val} + } + + } + if _, err := m.Put(key, value); err != nil { + return err + } + return nil +} + +func appendBody(ctx *transformContext, transformable transformable, key, value string) error { + return appendToCommonMap(transformable.body(), key, value) +} + +func appendHeader(ctx *transformContext, transformable transformable, key, value string) error { + if value == "" { + return nil + } + transformable.header().Add(key, value) + return nil +} + +func appendURLParams(ctx *transformContext, transformable transformable, key, value string) error { + if value == "" { + return nil + } + url := transformable.url() + q := url.Query() + q.Add(key, value) + url.RawQuery = q.Encode() + transformable.setURL(url) + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_append_test.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_append_test.go new file mode 100644 index 000000000000..12b49af4395f --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_append_test.go @@ -0,0 +1,186 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" +) + +func TestNewAppend(t *testing.T) { + cases := []struct { + name string + constructor constructor + config map[string]interface{} + expectedTarget targetInfo + expectedErr string + }{ + { + name: "newAppendResponse targets body", + constructor: newAppendResponse, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newAppendResponse targets something else", + constructor: newAppendResponse, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + { + name: "newAppendRequest targets body", + constructor: newAppendRequest, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newAppendRequest targets header", + constructor: newAppendRequest, + config: map[string]interface{}{ + "target": "header.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "header"}, + }, + { + name: "newAppendRequest targets url param", + constructor: newAppendRequest, + config: map[string]interface{}{ + "target": "url.params.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "url.params"}, + }, + { + name: "newAppendRequest targets something else", + constructor: newAppendRequest, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + { + name: "newAppendPagination targets body", + constructor: newAppendPagination, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newAppendPagination targets header", + constructor: newAppendPagination, + config: map[string]interface{}{ + "target": "header.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "header"}, + }, + { + name: "newAppendPagination targets url param", + constructor: newAppendPagination, + config: map[string]interface{}{ + "target": "url.params.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "url.params"}, + }, + { + name: "newAppendPagination targets url value", + constructor: newAppendPagination, + config: map[string]interface{}{ + "target": "url.value", + }, + expectedErr: "invalid target type: url.value", + }, + { + name: "newAppendPagination targets something else", + constructor: newAppendPagination, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + cfg := common.MustNewConfigFrom(tc.config) + gotAppend, gotErr := tc.constructor(cfg, nil) + if tc.expectedErr == "" { + assert.NoError(t, gotErr) + assert.Equal(t, tc.expectedTarget, (gotAppend.(*appendt)).targetInfo) + } else { + assert.EqualError(t, gotErr, tc.expectedErr) + } + }) + } +} + +func TestAppendFunctions(t *testing.T) { + cases := []struct { + name string + tfunc func(ctx *transformContext, transformable transformable, key, val string) error + paramCtx *transformContext + paramTr transformable + paramKey string + paramVal string + expectedTr transformable + expectedErr error + }{ + { + name: "appendBody", + tfunc: appendBody, + paramCtx: &transformContext{}, + paramTr: transformable{"body": common.MapStr{"a_key": "a_value"}}, + paramKey: "a_key", + paramVal: "another_value", + expectedTr: transformable{"body": common.MapStr{"a_key": []interface{}{"a_value", "another_value"}}}, + expectedErr: nil, + }, + { + name: "appendHeader", + tfunc: appendHeader, + paramCtx: &transformContext{}, + paramTr: transformable{"header": http.Header{ + "A_key": []string{"a_value"}, + }}, + paramKey: "a_key", + paramVal: "another_value", + expectedTr: transformable{"header": http.Header{"A_key": []string{"a_value", "another_value"}}}, + expectedErr: nil, + }, + { + name: "appendURLParams", + tfunc: appendURLParams, + paramCtx: &transformContext{}, + paramTr: transformable{"url": newURL("http://foo.example.com?a_key=a_value")}, + paramKey: "a_key", + paramVal: "another_value", + expectedTr: transformable{"url": newURL("http://foo.example.com?a_key=a_value&a_key=another_value")}, + expectedErr: nil, + }, + } + + for _, tcase := range cases { + tcase := tcase + t.Run(tcase.name, func(t *testing.T) { + gotErr := tcase.tfunc(tcase.paramCtx, tcase.paramTr, tcase.paramKey, tcase.paramVal) + if tcase.expectedErr == nil { + assert.NoError(t, gotErr) + } else { + assert.EqualError(t, gotErr, tcase.expectedErr.Error()) + } + assert.EqualValues(t, tcase.expectedTr, tcase.paramTr) + }) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_delete.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_delete.go new file mode 100644 index 000000000000..c8c54b8141ed --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_delete.go @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "fmt" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +const deleteName = "delete" + +type deleteConfig struct { + Target string `config:"target"` +} + +type delete struct { + targetInfo targetInfo + + runFunc func(ctx *transformContext, transformable transformable, key string) error +} + +func (delete) transformName() string { return deleteName } + +func newDeleteRequest(cfg *common.Config, _ *logp.Logger) (transform, error) { + delete, err := newDelete(cfg) + if err != nil { + return nil, err + } + + switch delete.targetInfo.Type { + case targetBody: + delete.runFunc = deleteBody + case targetHeader: + delete.runFunc = deleteHeader + case targetURLParams: + delete.runFunc = deleteURLParams + default: + return nil, fmt.Errorf("invalid target type: %s", delete.targetInfo.Type) + } + + return &delete, nil +} + +func newDeleteResponse(cfg *common.Config, _ *logp.Logger) (transform, error) { + delete, err := newDelete(cfg) + if err != nil { + return nil, err + } + + switch delete.targetInfo.Type { + case targetBody: + delete.runFunc = deleteBody + default: + return nil, fmt.Errorf("invalid target type: %s", delete.targetInfo.Type) + } + + return &delete, nil +} + +func newDeletePagination(cfg *common.Config, _ *logp.Logger) (transform, error) { + delete, err := newDelete(cfg) + if err != nil { + return nil, err + } + + switch delete.targetInfo.Type { + case targetBody: + delete.runFunc = deleteBody + case targetHeader: + delete.runFunc = deleteHeader + case targetURLParams: + delete.runFunc = deleteURLParams + default: + return nil, fmt.Errorf("invalid target type: %s", delete.targetInfo.Type) + } + + return &delete, nil +} + +func newDelete(cfg *common.Config) (delete, error) { + c := &deleteConfig{} + if err := cfg.Unpack(c); err != nil { + return delete{}, errors.Wrap(err, "fail to unpack the delete configuration") + } + + ti, err := getTargetInfo(c.Target) + if err != nil { + return delete{}, err + } + + return delete{ + targetInfo: ti, + }, nil +} + +func (delete *delete) run(ctx *transformContext, tr transformable) (transformable, error) { + if err := delete.runFunc(ctx, tr, delete.targetInfo.Name); err != nil { + return transformable{}, err + } + return tr, nil +} + +func deleteFromCommonMap(m common.MapStr, key string) error { + if err := m.Delete(key); err != common.ErrKeyNotFound { + return err + } + return nil +} + +func deleteBody(ctx *transformContext, transformable transformable, key string) error { + return deleteFromCommonMap(transformable.body(), key) +} + +func deleteHeader(ctx *transformContext, transformable transformable, key string) error { + transformable.header().Del(key) + return nil +} + +func deleteURLParams(ctx *transformContext, transformable transformable, key string) error { + url := transformable.url() + q := url.Query() + q.Del(key) + url.RawQuery = q.Encode() + transformable.setURL(url) + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_delete_test.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_delete_test.go new file mode 100644 index 000000000000..22cbce310f97 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_delete_test.go @@ -0,0 +1,182 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" +) + +func TestNewDelete(t *testing.T) { + cases := []struct { + name string + constructor constructor + config map[string]interface{} + expectedTarget targetInfo + expectedErr string + }{ + { + name: "newDeleteResponse targets body", + constructor: newDeleteResponse, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newDeleteResponse targets something else", + constructor: newDeleteResponse, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + { + name: "newDeleteRequest targets body", + constructor: newDeleteRequest, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newDeleteRequest targets header", + constructor: newDeleteRequest, + config: map[string]interface{}{ + "target": "header.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "header"}, + }, + { + name: "newDeleteRequest targets url param", + constructor: newDeleteRequest, + config: map[string]interface{}{ + "target": "url.params.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "url.params"}, + }, + { + name: "newDeleteRequest targets something else", + constructor: newDeleteRequest, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + { + name: "newDeletePagination targets body", + constructor: newDeletePagination, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newDeletePagination targets header", + constructor: newDeletePagination, + config: map[string]interface{}{ + "target": "header.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "header"}, + }, + { + name: "newDeletePagination targets url param", + constructor: newDeletePagination, + config: map[string]interface{}{ + "target": "url.params.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "url.params"}, + }, + { + name: "newDeletePagination targets url value", + constructor: newDeletePagination, + config: map[string]interface{}{ + "target": "url.value", + }, + expectedErr: "invalid target type: url.value", + }, + { + name: "newDeletePagination targets something else", + constructor: newDeletePagination, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + cfg := common.MustNewConfigFrom(tc.config) + gotDelete, gotErr := tc.constructor(cfg, nil) + if tc.expectedErr == "" { + assert.NoError(t, gotErr) + assert.Equal(t, tc.expectedTarget, (gotDelete.(*delete)).targetInfo) + } else { + assert.EqualError(t, gotErr, tc.expectedErr) + } + }) + } +} + +func TestDeleteFunctions(t *testing.T) { + cases := []struct { + name string + tfunc func(ctx *transformContext, transformable transformable, key string) error + paramCtx *transformContext + paramTr transformable + paramKey string + expectedTr transformable + expectedErr error + }{ + { + name: "deleteBody", + tfunc: deleteBody, + paramCtx: &transformContext{}, + paramTr: transformable{"body": common.MapStr{"a_key": "a_value"}}, + paramKey: "a_key", + expectedTr: transformable{"body": common.MapStr{}}, + expectedErr: nil, + }, + { + name: "deleteHeader", + tfunc: deleteHeader, + paramCtx: &transformContext{}, + paramTr: transformable{"header": http.Header{ + "A_key": []string{"a_value"}, + }}, + paramKey: "a_key", + expectedTr: transformable{"header": http.Header{}}, + expectedErr: nil, + }, + { + name: "deleteURLParams", + tfunc: deleteURLParams, + paramCtx: &transformContext{}, + paramTr: transformable{"url": newURL("http://foo.example.com?a_key=a_value")}, + paramKey: "a_key", + expectedTr: transformable{"url": newURL("http://foo.example.com")}, + expectedErr: nil, + }, + } + + for _, tcase := range cases { + tcase := tcase + t.Run(tcase.name, func(t *testing.T) { + gotErr := tcase.tfunc(tcase.paramCtx, tcase.paramTr, tcase.paramKey) + if tcase.expectedErr == nil { + assert.NoError(t, gotErr) + } else { + assert.EqualError(t, gotErr, tcase.expectedErr.Error()) + } + assert.EqualValues(t, tcase.expectedTr, tcase.paramTr) + }) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_registry.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_registry.go new file mode 100644 index 000000000000..f0073f292771 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_registry.go @@ -0,0 +1,81 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "errors" + "fmt" + "strings" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +type constructor func(config *common.Config, log *logp.Logger) (transform, error) + +var registeredTransforms = newRegistry() + +type registry struct { + namespaces map[string]map[string]constructor +} + +func newRegistry() *registry { + return ®istry{namespaces: make(map[string]map[string]constructor)} +} + +func (reg *registry) register(namespace, transform string, cons constructor) error { + if cons == nil { + return errors.New("constructor can't be nil") + } + + m, found := reg.namespaces[namespace] + if !found { + reg.namespaces[namespace] = make(map[string]constructor) + m = reg.namespaces[namespace] + } + + if _, found := m[transform]; found { + return errors.New("already registered") + } + + m[transform] = cons + + return nil +} + +func (reg registry) String() string { + if len(reg.namespaces) == 0 { + return "(empty registry)" + } + + var str string + for namespace, m := range reg.namespaces { + var names []string + for k := range m { + names = append(names, k) + } + str += fmt.Sprintf("%s: (%s)\n", namespace, strings.Join(names, ", ")) + } + + return str +} + +func (reg registry) get(namespace, transform string) (constructor, bool) { + m, found := reg.namespaces[namespace] + if !found { + return nil, false + } + c, found := m[transform] + return c, found +} + +func registerTransform(namespace, transform string, constructor constructor) { + logp.L().Named(logName).Debugf("Register transform %s:%s", namespace, transform) + + err := registeredTransforms.register(namespace, transform, constructor) + if err != nil { + panic(err) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_set.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_set.go new file mode 100644 index 000000000000..fcdb1fbbb391 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_set.go @@ -0,0 +1,169 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "fmt" + "net/url" + + "github.com/pkg/errors" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +var errNewURLValueNotSet = errors.New("the new url.value was not set") + +const setName = "set" + +type setConfig struct { + Target string `config:"target"` + Value *valueTpl `config:"value"` + Default *valueTpl `config:"default"` +} + +type set struct { + log *logp.Logger + targetInfo targetInfo + value *valueTpl + defaultValue *valueTpl + + runFunc func(ctx *transformContext, transformable transformable, key, val string) error +} + +func (set) transformName() string { return setName } + +func newSetRequest(cfg *common.Config, log *logp.Logger) (transform, error) { + set, err := newSet(cfg, log) + if err != nil { + return nil, err + } + + switch set.targetInfo.Type { + case targetBody: + set.runFunc = setBody + case targetHeader: + set.runFunc = setHeader + case targetURLParams: + set.runFunc = setURLParams + default: + return nil, fmt.Errorf("invalid target type: %s", set.targetInfo.Type) + } + + return &set, nil +} + +func newSetResponse(cfg *common.Config, log *logp.Logger) (transform, error) { + set, err := newSet(cfg, log) + if err != nil { + return nil, err + } + + switch set.targetInfo.Type { + case targetBody: + set.runFunc = setBody + default: + return nil, fmt.Errorf("invalid target type: %s", set.targetInfo.Type) + } + + return &set, nil +} + +func newSetPagination(cfg *common.Config, log *logp.Logger) (transform, error) { + set, err := newSet(cfg, log) + if err != nil { + return nil, err + } + + switch set.targetInfo.Type { + case targetBody: + set.runFunc = setBody + case targetHeader: + set.runFunc = setHeader + case targetURLParams: + set.runFunc = setURLParams + case targetURLValue: + set.runFunc = setURLValue + default: + return nil, fmt.Errorf("invalid target type: %s", set.targetInfo.Type) + } + + return &set, nil +} + +func newSet(cfg *common.Config, log *logp.Logger) (set, error) { + c := &setConfig{} + if err := cfg.Unpack(c); err != nil { + return set{}, errors.Wrap(err, "fail to unpack the set configuration") + } + + ti, err := getTargetInfo(c.Target) + if err != nil { + return set{}, err + } + + return set{ + log: log, + targetInfo: ti, + value: c.Value, + defaultValue: c.Default, + }, nil +} + +func (set *set) run(ctx *transformContext, tr transformable) (transformable, error) { + value := set.value.Execute(ctx, tr, set.defaultValue, set.log) + if err := set.runFunc(ctx, tr, set.targetInfo.Name, value); err != nil { + return transformable{}, err + } + return tr, nil +} + +func setToCommonMap(m common.MapStr, key, val string) error { + if val == "" { + return nil + } + if _, err := m.Put(key, val); err != nil { + return err + } + return nil +} + +func setBody(ctx *transformContext, transformable transformable, key, value string) error { + return setToCommonMap(transformable.body(), key, value) +} + +func setHeader(ctx *transformContext, transformable transformable, key, value string) error { + if value == "" { + return nil + } + transformable.header().Add(key, value) + return nil +} + +func setURLParams(ctx *transformContext, transformable transformable, key, value string) error { + if value == "" { + return nil + } + url := transformable.url() + q := url.Query() + q.Set(key, value) + url.RawQuery = q.Encode() + transformable.setURL(url) + return nil +} + +func setURLValue(ctx *transformContext, transformable transformable, _, value string) error { + // if the template processing did not find any value + // we fail without parsing + if value == "" || value == "" { + return errNewURLValueNotSet + } + url, err := url.Parse(value) + if err != nil { + return errNewURLValueNotSet + } + transformable.setURL(*url) + return nil +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_set_test.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_set_test.go new file mode 100644 index 000000000000..e6afd3dae073 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_set_test.go @@ -0,0 +1,199 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "net/http" + "net/url" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" +) + +func TestNewSet(t *testing.T) { + cases := []struct { + name string + constructor constructor + config map[string]interface{} + expectedTarget targetInfo + expectedErr string + }{ + { + name: "newSetResponse targets body", + constructor: newSetResponse, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newSetResponse targets something else", + constructor: newSetResponse, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + { + name: "newSetRequest targets body", + constructor: newSetRequest, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newSetRequest targets header", + constructor: newSetRequest, + config: map[string]interface{}{ + "target": "header.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "header"}, + }, + { + name: "newSetRequest targets url param", + constructor: newSetRequest, + config: map[string]interface{}{ + "target": "url.params.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "url.params"}, + }, + { + name: "newSetRequest targets something else", + constructor: newSetRequest, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + { + name: "newSetPagination targets body", + constructor: newSetPagination, + config: map[string]interface{}{ + "target": "body.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "body"}, + }, + { + name: "newSetPagination targets header", + constructor: newSetPagination, + config: map[string]interface{}{ + "target": "header.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "header"}, + }, + { + name: "newSetPagination targets url param", + constructor: newSetPagination, + config: map[string]interface{}{ + "target": "url.params.foo", + }, + expectedTarget: targetInfo{Name: "foo", Type: "url.params"}, + }, + { + name: "newSetPagination targets url value", + constructor: newSetPagination, + config: map[string]interface{}{ + "target": "url.value", + }, + expectedTarget: targetInfo{Type: "url.value"}, + }, + { + name: "newSetPagination targets something else", + constructor: newSetPagination, + config: map[string]interface{}{ + "target": "cursor.foo", + }, + expectedErr: "invalid target: cursor.foo", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + cfg := common.MustNewConfigFrom(tc.config) + gotSet, gotErr := tc.constructor(cfg, nil) + if tc.expectedErr == "" { + assert.NoError(t, gotErr) + assert.Equal(t, tc.expectedTarget, (gotSet.(*set)).targetInfo) + } else { + assert.EqualError(t, gotErr, tc.expectedErr) + } + }) + } +} + +func TestSetFunctions(t *testing.T) { + cases := []struct { + name string + tfunc func(ctx *transformContext, transformable transformable, key, val string) error + paramCtx *transformContext + paramTr transformable + paramKey string + paramVal string + expectedTr transformable + expectedErr error + }{ + { + name: "setBody", + tfunc: setBody, + paramCtx: &transformContext{}, + paramTr: transformable{"body": common.MapStr{}}, + paramKey: "a_key", + paramVal: "a_value", + expectedTr: transformable{"body": common.MapStr{"a_key": "a_value"}}, + expectedErr: nil, + }, + { + name: "setHeader", + tfunc: setHeader, + paramCtx: &transformContext{}, + paramTr: transformable{"header": http.Header{}}, + paramKey: "a_key", + paramVal: "a_value", + expectedTr: transformable{"header": http.Header{"A_key": []string{"a_value"}}}, + expectedErr: nil, + }, + { + name: "setURLParams", + tfunc: setURLParams, + paramCtx: &transformContext{}, + paramTr: transformable{"url": newURL("http://foo.example.com")}, + paramKey: "a_key", + paramVal: "a_value", + expectedTr: transformable{"url": newURL("http://foo.example.com?a_key=a_value")}, + expectedErr: nil, + }, + { + name: "setURLValue", + tfunc: setURLValue, + paramCtx: &transformContext{}, + paramTr: transformable{"url": newURL("http://foo.example.com")}, + paramVal: "http://different.example.com", + expectedTr: transformable{"url": newURL("http://different.example.com")}, + expectedErr: nil, + }, + } + + for _, tcase := range cases { + tcase := tcase + t.Run(tcase.name, func(t *testing.T) { + gotErr := tcase.tfunc(tcase.paramCtx, tcase.paramTr, tcase.paramKey, tcase.paramVal) + if tcase.expectedErr == nil { + assert.NoError(t, gotErr) + } else { + assert.EqualError(t, gotErr, tcase.expectedErr.Error()) + } + assert.EqualValues(t, tcase.expectedTr, tcase.paramTr) + }) + } +} + +func newURL(u string) url.URL { + url, _ := url.Parse(u) + return *url +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_target.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_target.go new file mode 100644 index 000000000000..2fd6d83d3c09 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_target.go @@ -0,0 +1,66 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "fmt" + "strings" +) + +type targetType string + +const ( + targetBody targetType = "body" + targetHeader targetType = "header" + targetURLValue targetType = "url.value" + targetURLParams targetType = "url.params" +) + +type errInvalidTarget struct { + target string +} + +func (err errInvalidTarget) Error() string { + return fmt.Sprintf("invalid target: %s", err.target) +} + +type targetInfo struct { + Type targetType + Name string +} + +func getTargetInfo(t string) (targetInfo, error) { + parts := strings.SplitN(t, ".", 2) + if len(parts) < 2 { + return targetInfo{}, errInvalidTarget{t} + } + switch parts[0] { + case "url": + if parts[1] == "value" { + return targetInfo{Type: targetURLValue}, nil + } + + paramParts := strings.SplitN(parts[1], ".", 2) + if len(paramParts) < 2 || paramParts[0] != "params" { + return targetInfo{}, errInvalidTarget{t} + } + + return targetInfo{ + Type: targetURLParams, + Name: paramParts[1], + }, nil + case "header": + return targetInfo{ + Type: targetHeader, + Name: parts[1], + }, nil + case "body": + return targetInfo{ + Type: targetBody, + Name: parts[1], + }, nil + } + return targetInfo{}, errInvalidTarget{t} +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_target_test.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_target_test.go new file mode 100644 index 000000000000..2042c8dab38f --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_target_test.go @@ -0,0 +1,74 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetTargetInfo(t *testing.T) { + cases := []struct { + name string + param string + expected targetInfo + expectedErr string + }{ + { + name: "valid url.value", + param: "url.value", + expected: targetInfo{Type: "url.value"}, + }, + { + name: "invalid url.value", + param: "url.value.something", + expectedErr: "invalid target: url.value.something", + }, + { + name: "valid url.params", + param: "url.params.foo", + expected: targetInfo{Type: "url.params", Name: "foo"}, + }, + { + name: "invalid url.params", + param: "url.params", + expectedErr: "invalid target: url.params", + }, + { + name: "valid header", + param: "header.foo", + expected: targetInfo{Type: "header", Name: "foo"}, + }, + { + name: "valid body", + param: "body.foo.bar", + expected: targetInfo{Type: "body", Name: "foo.bar"}, + }, + { + name: "invalid target: missing part", + param: "header", + expectedErr: "invalid target: header", + }, + { + name: "invalid target: unknown", + param: "unknown.foo", + expectedErr: "invalid target: unknown.foo", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + got, gotErr := getTargetInfo(tc.param) + if tc.expectedErr == "" { + assert.NoError(t, gotErr) + assert.Equal(t, tc.expected, got) + } else { + assert.EqualError(t, gotErr, tc.expectedErr) + } + }) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/transform_test.go b/x-pack/filebeat/input/httpjson/internal/v2/transform_test.go new file mode 100644 index 000000000000..6336330cd903 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/transform_test.go @@ -0,0 +1,168 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func TestEmptyTransformContext(t *testing.T) { + ctx := emptyTransformContext() + assert.Equal(t, &cursor{}, ctx.cursor) + assert.Equal(t, &common.MapStr{}, ctx.lastEvent) + assert.Equal(t, &response{}, ctx.lastResponse) +} + +func TestEmptyTransformable(t *testing.T) { + tr := transformable{} + assert.Equal(t, common.MapStr{}, tr.body()) + assert.Equal(t, http.Header{}, tr.header()) +} + +func TestTransformableNilClone(t *testing.T) { + var tr transformable + cl := tr.Clone() + assert.Equal(t, common.MapStr{}, cl.body()) + assert.Equal(t, http.Header{}, cl.header()) +} + +func TestTransformableClone(t *testing.T) { + tr := transformable{} + body := tr.body() + _, _ = body.Put("key", "value") + tr.setBody(body) + cl := tr.Clone() + assert.Equal(t, common.MapStr{"key": "value"}, cl.body()) + assert.Equal(t, http.Header{}, cl.header()) +} + +func TestNewTransformsFromConfig(t *testing.T) { + registerTransform("test", setName, newSetRequest) + t.Cleanup(func() { registeredTransforms = newRegistry() }) + + cases := []struct { + name string + paramCfg map[string]interface{} + paramNamespace string + expectedTransforms transforms + expectedErr string + }{ + { + name: "fails if config has more than one action", + paramCfg: map[string]interface{}{ + "set": nil, + "set2": nil, + }, + expectedErr: "each transform must have exactly one action, but found 2 actions", + }, + { + name: "fails if not found in namespace", + paramCfg: map[string]interface{}{ + "set": nil, + }, + paramNamespace: "empty", + expectedErr: "the transform set does not exist. Valid transforms: test: (set)\n", + }, + { + name: "fails if constructor fails", + paramCfg: map[string]interface{}{ + "set": map[string]interface{}{ + "target": "invalid", + }, + }, + paramNamespace: "test", + expectedErr: "invalid target: invalid", + }, + { + name: "transform is correct", + paramCfg: map[string]interface{}{ + "set": map[string]interface{}{ + "target": "body.foo", + }, + }, + paramNamespace: "test", + expectedTransforms: transforms{ + &set{ + targetInfo: targetInfo{Name: "foo", Type: "body"}, + }, + }, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + cfg := common.MustNewConfigFrom(tc.paramCfg) + gotTransforms, gotErr := newTransformsFromConfig(transformsConfig{cfg}, tc.paramNamespace, nil) + if tc.expectedErr == "" { + assert.NoError(t, gotErr) + tr := gotTransforms[0].(*set) + tr.runFunc = nil // we do not want to check func pointer + assert.EqualValues(t, tc.expectedTransforms, gotTransforms) + } else { + assert.EqualError(t, gotErr, tc.expectedErr) + } + }) + } +} + +type fakeTransform struct{} + +func (fakeTransform) transformName() string { return "fake" } + +func TestNewBasicTransformsFromConfig(t *testing.T) { + fakeConstr := func(*common.Config, *logp.Logger) (transform, error) { + + return fakeTransform{}, nil + } + + registerTransform("test", setName, newSetRequest) + registerTransform("test", "fake", fakeConstr) + t.Cleanup(func() { registeredTransforms = newRegistry() }) + + cases := []struct { + name string + paramCfg map[string]interface{} + paramNamespace string + expectedErr string + }{ + { + name: "succeeds if transform is basicTransform", + paramCfg: map[string]interface{}{ + "set": map[string]interface{}{ + "target": "body.foo", + }, + }, + paramNamespace: "test", + }, + { + name: "fails if transform is not a basicTransform", + paramCfg: map[string]interface{}{ + "fake": nil, + }, + paramNamespace: "test", + expectedErr: "transform fake is not a valid test transform", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + cfg := common.MustNewConfigFrom(tc.paramCfg) + _, gotErr := newBasicTransformsFromConfig(transformsConfig{cfg}, tc.paramNamespace, nil) + if tc.expectedErr == "" { + assert.NoError(t, gotErr) + } else { + assert.EqualError(t, gotErr, tc.expectedErr) + } + }) + } +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/value_tpl.go b/x-pack/filebeat/input/httpjson/internal/v2/value_tpl.go new file mode 100644 index 000000000000..93a84bcc4b31 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/value_tpl.go @@ -0,0 +1,201 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "bytes" + "regexp" + "strconv" + "strings" + "text/template" + "time" + + "github.com/elastic/beats/v7/libbeat/logp" +) + +type valueTpl struct { + *template.Template +} + +func (t *valueTpl) Unpack(in string) error { + tpl, err := template.New(""). + Option("missingkey=error"). + Funcs(template.FuncMap{ + "now": now, + "parseDate": parseDate, + "formatDate": formatDate, + "parseDuration": parseDuration, + "parseTimestamp": parseTimestamp, + "parseTimestampMilli": parseTimestampMilli, + "parseTimestampNano": parseTimestampNano, + "getRFC5988Link": getRFC5988Link, + "toInt": toInt, + "add": add, + }). + Parse(in) + if err != nil { + return err + } + + *t = valueTpl{Template: tpl} + + return nil +} + +func (t *valueTpl) Execute(trCtx *transformContext, tr transformable, defaultVal *valueTpl, log *logp.Logger) (val string) { + fallback := func(err error) string { + if err != nil { + log.Debugf("template execution failed: %v", err) + } + if defaultVal != nil { + log.Debugf("template execution: falling back to default value") + return defaultVal.Execute(emptyTransformContext(), transformable{}, nil, log) + } + return "" + } + + defer func() { + if r := recover(); r != nil { + val = fallback(r.(error)) + } + log.Debugf("template execution: evaluated template %q", val) + }() + + buf := new(bytes.Buffer) + data := tr.Clone() + data.Put("cursor", trCtx.cursorMap()) + data.Put("last_event", trCtx.lastEventClone()) + data.Put("last_response", trCtx.lastResponseClone().templateValues()) + + if err := t.Template.Execute(buf, data); err != nil { + return fallback(err) + } + + val = buf.String() + if val == "" || strings.Contains(val, "") { + return fallback(nil) + } + return val +} + +var ( + predefinedLayouts = map[string]string{ + "ANSIC": time.ANSIC, + "UnixDate": time.UnixDate, + "RubyDate": time.RubyDate, + "RFC822": time.RFC822, + "RFC822Z": time.RFC822Z, + "RFC850": time.RFC850, + "RFC1123": time.RFC1123, + "RFC1123Z": time.RFC1123Z, + "RFC3339": time.RFC3339, + "RFC3339Nano": time.RFC3339Nano, + "Kitchen": time.Kitchen, + } +) + +func now(add ...time.Duration) time.Time { + now := timeNow().UTC() + if len(add) == 0 { + return now + } + return now.Add(add[0]) +} + +func parseDuration(s string) time.Duration { + d, _ := time.ParseDuration(s) + return d +} + +func parseDate(date string, layout ...string) time.Time { + var ly string + if len(layout) == 0 { + ly = "RFC3339" + } else { + ly = layout[0] + } + if found := predefinedLayouts[ly]; found != "" { + ly = found + } + + t, err := time.Parse(ly, date) + if err != nil { + return time.Time{} + } + + return t.UTC() +} + +func formatDate(date time.Time, layouttz ...string) string { + var layout, tz string + switch { + case len(layouttz) == 0: + layout = "RFC3339" + case len(layouttz) == 1: + layout = layouttz[0] + case len(layouttz) > 1: + layout, tz = layouttz[0], layouttz[1] + } + + if found := predefinedLayouts[layout]; found != "" { + layout = found + } + + if loc, err := time.LoadLocation(tz); err == nil { + date = date.In(loc) + } else { + date = date.UTC() + } + + return date.Format(layout) +} + +func parseTimestamp(s int64) time.Time { + return time.Unix(s, 0).UTC() +} + +func parseTimestampMilli(ms int64) time.Time { + return time.Unix(0, ms*1e6).UTC() +} + +func parseTimestampNano(ns int64) time.Time { + return time.Unix(0, ns).UTC() +} + +var regexpLinkRel = regexp.MustCompile(`<(.*)>;.*\srel\="?([^;"]*)`) + +func getRFC5988Link(rel string, links []string) string { + for _, link := range links { + if !regexpLinkRel.MatchString(link) { + continue + } + + matches := regexpLinkRel.FindStringSubmatch(link) + if len(matches) != 3 { + continue + } + + if matches[2] != rel { + continue + } + + return matches[1] + } + + return "" +} + +func toInt(s string) int { + i, _ := strconv.ParseInt(s, 10, 64) + return int(i) +} + +func add(vs ...int) int { + var sum int + for _, v := range vs { + sum += v + } + return sum +} diff --git a/x-pack/filebeat/input/httpjson/internal/v2/value_tpl_test.go b/x-pack/filebeat/input/httpjson/internal/v2/value_tpl_test.go new file mode 100644 index 000000000000..d60c4a817187 --- /dev/null +++ b/x-pack/filebeat/input/httpjson/internal/v2/value_tpl_test.go @@ -0,0 +1,243 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package v2 + +import ( + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func TestValueTpl(t *testing.T) { + cases := []struct { + name string + value string + paramCtx *transformContext + paramTr transformable + paramDefVal string + expected string + setup func() + teardown func() + }{ + { + name: "can render values from ctx", + value: "{{.last_response.body.param}}", + paramCtx: &transformContext{ + lastEvent: &common.MapStr{}, + lastResponse: newTestResponse(common.MapStr{"param": 25}, nil, ""), + }, + paramTr: transformable{}, + paramDefVal: "", + expected: "25", + }, + { + name: "can render default value if execute fails", + value: "{{.last_response.body.does_not_exist}}", + paramCtx: &transformContext{ + lastEvent: &common.MapStr{}, + }, + paramTr: transformable{}, + paramDefVal: "25", + expected: "25", + }, + { + name: "can render default value if template is empty", + value: "", + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + paramDefVal: "25", + expected: "25", + }, + { + name: "can render default value if execute panics", + value: "{{.last_response.panic}}", + paramDefVal: "25", + expected: "25", + }, + { + name: "func parseDuration", + value: `{{ parseDuration "-1h" }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "-1h0m0s", + }, + { + name: "func now", + setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } }, + teardown: func() { timeNow = time.Now }, + value: `{{ now }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 13:25:32 +0000 UTC", + }, + { + name: "func now with duration", + setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } }, + teardown: func() { timeNow = time.Now }, + value: `{{ now (parseDuration "-1h") }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 12:25:32 +0000 UTC", + }, + { + name: "func parseDate", + value: `{{ parseDate "2020-11-05T12:25:32.1234567Z" "RFC3339Nano" }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 12:25:32.1234567 +0000 UTC", + }, + { + name: "func parseDate defaults to RFC3339", + value: `{{ parseDate "2020-11-05T12:25:32Z" }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 12:25:32 +0000 UTC", + }, + { + name: "func parseDate with custom layout", + value: `{{ (parseDate "Thu Nov 5 12:25:32 +0000 2020" "Mon Jan _2 15:04:05 -0700 2006") }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 12:25:32 +0000 UTC", + }, + { + name: "func formatDate", + setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } }, + teardown: func() { timeNow = time.Now }, + value: `{{ formatDate (now) "UnixDate" "America/New_York" }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "Thu Nov 5 08:25:32 EST 2020", + }, + { + name: "func formatDate defaults to UTC", + setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } }, + teardown: func() { timeNow = time.Now }, + value: `{{ formatDate (now) "UnixDate" }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "Thu Nov 5 13:25:32 UTC 2020", + }, + { + name: "func formatDate falls back to UTC", + setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } }, + teardown: func() { timeNow = time.Now }, + value: `{{ formatDate (now) "UnixDate" "wrong/tz"}}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "Thu Nov 5 13:25:32 UTC 2020", + }, + { + name: "func parseTimestamp", + value: `{{ (parseTimestamp 1604582732) }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 13:25:32 +0000 UTC", + }, + { + name: "func parseTimestampMilli", + value: `{{ (parseTimestampMilli 1604582732000) }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 13:25:32 +0000 UTC", + }, + { + name: "func parseTimestampNano", + value: `{{ (parseTimestampNano 1604582732000000000) }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05 13:25:32 +0000 UTC", + }, + { + name: "func getRFC5988Link", + value: `{{ getRFC5988Link "previous" .last_response.header.Link }}`, + paramCtx: &transformContext{ + lastEvent: &common.MapStr{}, + lastResponse: newTestResponse( + nil, + http.Header{"Link": []string{ + `; title="Page 3"; rel="next"`, + `; title="Page 1"; rel="previous"`, + }}, + "", + ), + }, + paramTr: transformable{}, + expected: "https://example.com/api/v1/users?before=00ubfjQEMYBLRUWIEDKK", + }, + { + name: "func getRFC5988Link does not match", + value: `{{ getRFC5988Link "previous" .last_response.header.Link }}`, + paramCtx: &transformContext{ + lastResponse: newTestResponse( + nil, + http.Header{"Link": []string{ + ``, + }}, + "", + ), + }, + paramTr: transformable{}, + paramDefVal: "https://example.com/default", + expected: "https://example.com/default", + }, + { + name: "func getRFC5988Link empty header", + value: `{{ getRFC5988Link "previous" .last_response.header.Empty }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + paramDefVal: "https://example.com/default", + expected: "https://example.com/default", + }, + { + name: "can execute functions pipeline", + setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } }, + teardown: func() { timeNow = time.Now }, + value: `{{ (parseDuration "-1h") | now | formatDate }}`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expected: "2020-11-05T12:25:32Z", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + if tc.setup != nil { + tc.setup() + } + if tc.teardown != nil { + t.Cleanup(tc.teardown) + } + tpl := &valueTpl{} + assert.NoError(t, tpl.Unpack(tc.value)) + defTpl := &valueTpl{} + assert.NoError(t, defTpl.Unpack(tc.paramDefVal)) + got := tpl.Execute(tc.paramCtx, tc.paramTr, defTpl, logp.NewLogger("")) + assert.Equal(t, tc.expected, got) + }) + } +} + +func newTestResponse(body common.MapStr, header http.Header, url string) *response { + resp := &response{ + header: http.Header{}, + } + if len(body) > 0 { + resp.body = body + } + if len(header) > 0 { + resp.header = header + } + if url != "" { + resp.url = newURL(url) + } + return resp +}