Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace Elasticsearch with Opensearch #368

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion provisioning/resources/configs/Caddyfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
route /kibana* {
uri strip_prefix /kibana
header {
Content-Security-Policy "default-src 'self'; frame-ancestors 'none'; style-src 'self' 'unsafe-inline'; script-src 'self' 'unsafe-eval' 'unsafe-inline'; img-src data: 'self'"
Content-Security-Policy "default-src 'self'; frame-ancestors 'none'; style-src 'self' 'unsafe-inline'; script-src 'self' 'unsafe-eval'; img-src data: 'self'"
X-Frame-Options "DENY"
}
reverse_proxy localhost:5601 {
Expand Down
22 changes: 13 additions & 9 deletions provisioning/resources/configs/compositions/docker-compose-aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@ version: "3"

services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.8.23
image: opensearchproject/opensearch:2.4.0
container_name: elasticsearch
restart: always
environment:
# Swapping needs to be disabled for performance and node stability
- "bootstrap.memory_lock=true"
- ES_JAVA_OPTS=-Xms${ES_JVM_SIZE} -Xmx${ES_JVM_SIZE}
- OPENSEARCH_JAVA_OPTS=-Xms${ES_JVM_SIZE} -Xmx${ES_JVM_SIZE}
- "DISABLE_INSTALL_DEMO_CONFIG=true"
- "DISABLE_SECURITY_PLUGIN=true"
volumes:
- /home/ubuntu/snowplow/elasticsearch/data:/usr/share/elasticsearch/data
- /home/ubuntu/snowplow/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
- /home/ubuntu/snowplow/elasticsearch/config/log4j2.properties:/usr/share/elasticsearch/config/log4j2.properties
- /home/ubuntu/snowplow/elasticsearch/data:/usr/share/opensearch/data
- /home/ubuntu/snowplow/elasticsearch/config/elasticsearch.yml:/usr/share/opensearch/config/opensearch.yml
- /home/ubuntu/snowplow/elasticsearch/config/log4j2.properties:/usr/share/opensearch/config/log4j2.properties
ulimits:
memlock:
soft: -1
Expand All @@ -30,11 +32,13 @@ services:
- "9300:9300"

kibana:
image: docker.elastic.co/kibana/kibana-oss:6.8.23
image: opensearchproject/opensearch-dashboards:2.4.0
container_name: kibana
restart: always
environment:
- "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true"
volumes:
- /home/ubuntu/snowplow/elasticsearch/config/kibana.yml:/usr/share/kibana/config/kibana.yml
- /home/ubuntu/snowplow/elasticsearch/config/kibana.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml
ports:
- "5601:5601"
depends_on:
Expand All @@ -46,7 +50,7 @@ services:
awslogs-stream: "kibana"

elasticsearch-loader-good:
image: snowplow/elasticsearch-loader:1.0.7
image: snowplow/elasticsearch-loader:2.0.8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you try 2.0.8-distroless?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have distroless for elasticsearch loader ? I couldn't see in here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry my mistake - you are right. Elasticsearch loader is the only app for which we don't have a distroless image (yet).

container_name: elasticsearch-loader-good
command: [ "--config", "/snowplow/config/snowplow-es-loader-good.hocon" ]
restart: always
Expand All @@ -63,7 +67,7 @@ services:
- "JAVA_OPTS=-Xmx${SP_JVM_SIZE} -Dlog4j2.formatMsgNoLookups=true"

elasticsearch-loader-bad:
image: snowplow/elasticsearch-loader:1.0.7
image: snowplow/elasticsearch-loader:2.0.8
container_name: elasticsearch-loader-bad
command: [ "--config", "/snowplow/config/snowplow-es-loader-bad.hocon" ]
restart: always
Expand Down
22 changes: 13 additions & 9 deletions provisioning/resources/configs/compositions/docker-compose-gcp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@ version: "3"

services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.8.23
image: opensearchproject/opensearch:2.4.0
container_name: elasticsearch
restart: always
environment:
# Swapping needs to be disabled for performance and node stability
- "bootstrap.memory_lock=true"
- ES_JAVA_OPTS=-Xms${ES_JVM_SIZE} -Xmx${ES_JVM_SIZE}
- OPENSEARCH_JAVA_OPTS=-Xms${ES_JVM_SIZE} -Xmx${ES_JVM_SIZE}
- "DISABLE_INSTALL_DEMO_CONFIG=true"
- "DISABLE_SECURITY_PLUGIN=true"
volumes:
- /home/ubuntu/snowplow/elasticsearch/data:/usr/share/elasticsearch/data
- /home/ubuntu/snowplow/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
- /home/ubuntu/snowplow/elasticsearch/config/log4j2.properties:/usr/share/elasticsearch/config/log4j2.properties
- /home/ubuntu/snowplow/elasticsearch/data:/usr/share/opensearch/data
- /home/ubuntu/snowplow/elasticsearch/config/elasticsearch.yml:/usr/share/opensearch/config/opensearch.yml
- /home/ubuntu/snowplow/elasticsearch/config/log4j2.properties:/usr/share/opensearch/config/log4j2.properties
ulimits:
memlock:
soft: -1
Expand All @@ -27,11 +29,13 @@ services:
- "9300:9300"

kibana:
image: docker.elastic.co/kibana/kibana-oss:6.8.23
image: opensearchproject/opensearch-dashboards:2.4.0
container_name: kibana
restart: always
environment:
- "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true"
volumes:
- /home/ubuntu/snowplow/elasticsearch/config/kibana.yml:/usr/share/kibana/config/kibana.yml
- /home/ubuntu/snowplow/elasticsearch/config/kibana.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml
ports:
- "5601:5601"
depends_on:
Expand All @@ -40,7 +44,7 @@ services:
driver: gcplogs

elasticsearch-loader-good:
image: snowplow/elasticsearch-loader:1.0.7
image: snowplow/elasticsearch-loader:2.0.8
container_name: elasticsearch-loader-good
command: [ "--config", "/snowplow/config/snowplow-es-loader-good.hocon" ]
restart: always
Expand All @@ -54,7 +58,7 @@ services:
- "JAVA_OPTS=-Xmx${SP_JVM_SIZE} -Dlog4j2.formatMsgNoLookups=true"

elasticsearch-loader-bad:
image: snowplow/elasticsearch-loader:1.0.7
image: snowplow/elasticsearch-loader:2.0.8
container_name: elasticsearch-loader-bad
command: [ "--config", "/snowplow/config/snowplow-es-loader-bad.hocon" ]
restart: always
Expand Down
213 changes: 96 additions & 117 deletions provisioning/resources/configs/snowplow-es-loader-bad.hocon
Original file line number Diff line number Diff line change
Expand Up @@ -11,125 +11,104 @@
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (config.hocon.sample) contains a template with
# configuration options for the Elasticsearch Loader.

# Sources currently supported are:
# "kinesis" for reading records from a Kinesis stream
# "stdin" for reading unencoded tab-separated events from stdin
# If set to "stdin", JSON documents will not be sent to Elasticsearch
# but will be written to stdout.
# "nsq" for reading unencoded tab-separated events from NSQ
source = "nsq"

# Where to write good and bad records
sink {
# Sinks currently supported are:
# "elasticsearch" for writing good records to Elasticsearch
# "stdout" for writing good records to stdout
good = "elasticsearch"

# Sinks currently supported are:
# "kinesis" for writing bad records to Kinesis
# "stderr" for writing bad records to stderr
# "nsq" for writing bad records to NSQ
# "none" for ignoring bad records
bad = "nsq"
}

# "good" for a stream of successfully enriched events
# "bad" for a stream of bad events
# "plain-json" for writing plain json
enabled = "bad"

# The following are used to authenticate for the Amazon Kinesis sink.
#
# If both are set to "default", the default provider chain is used
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
#
# If both are set to "iam", use AWS IAM Roles to provision credentials.
#
# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
accessKey = ""
secretKey = ""
}

queue {
# What queue to use, can be "kinesis" or "nsq"
enabled="nsq"

# Config for NSQ
# Channel name for NSQ source
# If more than one application reading from the same NSQ topic at the same time,
# all of them must have unique channel name for getting all the data from the same topic
channelName = "ESLoaderChannelBad"

# Host name for nsqd
nsqdHost = "nsqd"
# HTTP port for nsqd
nsqdPort = 4150

# Host name for nsqlookupd
nsqlookupdHost = "nsqlookupd"
# HTTP port for nsqd
nsqlookupdPort = 4161
}

# Common configuration section for all stream sources
streams {
inStreamName = "BadEnrichedEvents"

# Stream for enriched events which are rejected by Elasticsearch
outStreamName = "BadElasticsearchEvents"

# Events are accumulated in a buffer before being sent to Elasticsearch.
# The buffer is emptied whenever:
# - the combined size of the stored records exceeds byteLimit or
# - the number of stored records exceeds recordLimit or
# - the time in milliseconds since it was last emptied exceeds timeLimit
buffer {
byteLimit = 5242880 # Not supported by NSQ, will be ignored
recordLimit = 1
timeLimit = 60000 # Not supported by NSQ, will be ignored
}
}

elasticsearch {

# Events are indexed using an Elasticsearch Client
# - endpoint: the cluster endpoint
# - port: the port the cluster can be accessed on
# - for http this is usually 9200
# - for transport this is usually 9300
# - username (optional, remove if not active): http basic auth username
# - password (optional, remove if not active): http basic auth password
# - shardDateFormat (optional, remove if not needed): formatting used for sharding good stream, i.e. _yyyy-MM-dd
# - shardDateField (optional, if not specified derived_tstamp is used): timestamp field for sharding good stream
# - max-timeout: the maximum attempt time before a client restart
# - ssl: if using the http client, whether to use ssl or not
client {
endpoint = "elasticsearch"
port = "9200"
maxTimeout = "10000"
maxRetries = 3
ssl = false
{
"input": {
# Sources currently supported are:
# "kinesis" for reading records from a Kinesis stream
# "stdin" for reading unencoded tab-separated events from stdin
# If set to "stdin", JSON documents will not be sent to Elasticsearch
# but will be written to stdout.
# "nsq" for reading unencoded tab-separated events from NSQ
"type": "nsq"

# Stream name for incoming data
"streamName": "BadEnrichedEvents"

# Channel name for NSQ source
# If more than one application reading from the same NSQ topic at the same time,
# all of them must have unique channel name for getting all the data from the same topic
"channelName": "ESLoaderChannelBad"

# Host name for nsqlookupd
"nsqlookupdHost": "nsqlookupd"

# HTTP port for nsqd
"nsqlookupdPort": 4161

# Events are accumulated in a buffer before being sent to Elasticsearch.
# The buffer is emptied whenever the number of stored records exceeds recordLimit
# This value is optional.
"buffer": {
"recordLimit" = 1
}
}

# When using the AWS ES service
# - signing: if using the http client and the AWS ES service you can sign your requests
# http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
# - region where the AWS ES service is located
aws {
signing = false
region = ""
"output": {
"good": {
# Good sinks currently supported are:
# "elasticsearch" for writing good records to Elasticsearch
# "stdout" for writing good records to stdout
# Default value "elasticsearch"
"type": "elasticsearch"

# Events are indexed using an Elasticsearch Client
# - endpoint: the cluster endpoint
# - port (optional, default value 9200): the port the cluster can be accessed on
# - for http this is usually 9200
# - for transport this is usually 9300
# - username (optional, remove if not active): http basic auth username
# - password (optional, remove if not active): http basic auth password
# - shardDateFormat (optional, remove if not needed): formatting used for sharding good stream, i.e. _yyyy-MM-dd
# - shardDateField (optional, if not specified derived_tstamp is used): timestamp field for sharding good stream
# - indexTimeout (optional, default value 60000): the maximum time to wait in milliseconds for a single http transaction when indexing events
# - maxTimeout (optional, default value 10000): the maximum time to wait in milliseconds between retries after load failures
# - maxRetries (optional, default value 6): the maximum number of request attempts before giving up
# - ssl (optional, default value false): if using the http client, whether to use ssl or not
"client": {
"endpoint": "elasticsearch"
"port": 9200
"maxTimeout" = "10000"
"maxRetries" = 3
"ssl" = false
}

# When using the AWS ES service
# - signing: if using the http client and the AWS ES service you can sign your requests
# http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
# - region where the AWS ES service is located
# These values are optional.
aws {
"signing" = false
"region" = "eu-central-1"
}

"cluster": {
# The Elasticsearch index name
# Default value "good"
"index": "bad"
}
}
"bad" {
# Bad sinks currently supported are:
# "kinesis" for writing bad records to Kinesis
# "stderr" for writing bad records to stderr
# "nsq" for writing bad records to NSQ
# "none" for ignoring bad records
"type": "nsq"

# Stream name for events which are rejected by Elasticsearch
"streamName": "BadElasticsearchEvents"

# Host name for nsqd
"nsqdHost": "nsqd"
# HTTP port for nsqd
"nsqdPort": 4150
}
}

# index: the Elasticsearch index name
# type: the Elasticsearch index type
cluster {
name = "elasticsearch"
index = "bad"
documentType = "bad"
}
# "ENRICHED_EVENTS" for a stream of successfully enriched events
# "BAD_ROWS" for a stream of bad events
# "JSON" for writing plain json
"purpose": "BAD_ROWS"
}

Loading