-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(elasticsearch) Analytics indices creation on AWS ES #5502
Changes from all commits
cdee86f
2c61723
362c47c
2e1b5e6
51dac7b
2cf8276
b7433f9
750b2d0
92c2673
9236d2e
31ac516
944984c
e820132
aa6cf0d
71653b0
43fcec5
0873672
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,98 +6,137 @@ set -e | |
: ${USE_AWS_ELASTICSEARCH:=false} | ||
: ${ELASTICSEARCH_INSECURE:=false} | ||
|
||
# protocol: http or https? | ||
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then | ||
ELASTICSEARCH_PROTOCOL=https | ||
else | ||
ELASTICSEARCH_PROTOCOL=http | ||
fi | ||
echo -e "Going to use protocol: $ELASTICSEARCH_PROTOCOL" | ||
echo -e "going to use protocol: $ELASTICSEARCH_PROTOCOL" | ||
|
||
if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then | ||
AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) | ||
ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" | ||
fi | ||
# Elasticsearch URL to be suffixed with a resource address | ||
ELASTICSEARCH_URL="$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT" | ||
|
||
# Add default header if needed | ||
# set auth header if none is given | ||
if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then | ||
echo -e "Going to use default elastic headers" | ||
ELASTICSEARCH_AUTH_HEADER="Accept: */*" | ||
if [[ ! -z $ELASTICSEARCH_USERNAME ]]; then | ||
# no auth header given, but username is defined -> use it to create the auth header | ||
AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) | ||
ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" | ||
echo -e "going to use elastic headers based on username and password" | ||
else | ||
# no auth header or username given -> use default auth header | ||
ELASTICSEARCH_AUTH_HEADER="Accept: */*" | ||
echo -e "going to use default elastic headers" | ||
fi | ||
fi | ||
|
||
# will be using this for all curl communication with Elasticsearch: | ||
CURL_ARGS=( | ||
--silent | ||
--header "$ELASTICSEARCH_AUTH_HEADER" | ||
) | ||
# ... also optionally use --insecure | ||
if [[ $ELASTICSEARCH_INSECURE == true ]]; then | ||
echo -e "Going to use default elastic insecure mode" | ||
ELASTICSEARCH_INSECURE="-k " | ||
else | ||
unset ELASTICSEARCH_INSECURE | ||
CURL_ARGS+=(--insecure) | ||
fi | ||
|
||
function create_datahub_usage_event_datastream() { | ||
if [[ -z "$INDEX_PREFIX" ]]; then | ||
PREFIX='' | ||
else | ||
PREFIX="${INDEX_PREFIX}_" | ||
fi | ||
echo -e "Create datahub_usage_event if needed against Elasticsearch at $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT" | ||
echo -e "Going to use index prefix:$PREFIX:" | ||
POLICY_RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy") | ||
echo -e "Policy GET response code is $POLICY_RESPONSE_CODE" | ||
POLICY_NAME="${PREFIX}datahub_usage_event_policy" | ||
if [ $POLICY_RESPONSE_CODE -eq 404 ]; then | ||
echo -e "\ncreating $POLICY_NAME" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/policy.json | tee -a /tmp/policy.json | ||
curl -s -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/$POLICY_NAME" --header "Content-Type: application/json" --data "@/tmp/policy.json" | ||
elif [ $POLICY_RESPONSE_CODE -eq 200 ]; then | ||
echo -e "\n${POLICY_NAME} exists" | ||
elif [ $POLICY_RESPONSE_CODE -eq 403 ]; then | ||
echo -e "Forbidden so exiting" | ||
exit 1 | ||
else | ||
echo -e "Got response code $POLICY_RESPONSE_CODE while creating policy so exiting." | ||
exit 1 | ||
fi | ||
|
||
TEMPLATE_RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template") | ||
echo -e "Template GET response code is $TEMPLATE_RESPONSE_CODE" | ||
TEMPLATE_NAME="${PREFIX}datahub_usage_event_index_template" | ||
if [ $TEMPLATE_RESPONSE_CODE -eq 404 ]; then | ||
echo -e "\ncreating $TEMPLATE_NAME" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/index_template.json | tee -a /tmp/index_template.json | ||
curl -s -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/$TEMPLATE_NAME" --header "Content-Type: application/json" --data "@/tmp/index_template.json" | ||
elif [ $TEMPLATE_RESPONSE_CODE -eq 200 ]; then | ||
echo -e "\n$TEMPLATE_NAME exists" | ||
elif [ $TEMPLATE_RESPONSE_CODE -eq 403 ]; then | ||
echo -e "Forbidden so exiting" | ||
# index prefix used throughout the script | ||
if [[ -z "$INDEX_PREFIX" ]]; then | ||
PREFIX='' | ||
echo -e "not using any prefix" | ||
else | ||
PREFIX="${INDEX_PREFIX}_" | ||
echo -e "going to use prefix: '$PREFIX'" | ||
fi | ||
|
||
# path where index definitions are stored | ||
INDEX_DEFINITIONS_ROOT=/index/usage-event | ||
|
||
|
||
# check Elasticsearch for given index/resource (first argument) | ||
# if it doesn't exist (http code 404), use the given file (second argument) to create it | ||
function create_if_not_exists { | ||
RESOURCE_ADDRESS="$1" | ||
RESOURCE_DEFINITION_NAME="$2" | ||
|
||
# query ES to see if the resource already exists | ||
RESOURCE_STATUS=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}\n" "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS") | ||
echo -e "\n>>> GET $RESOURCE_ADDRESS response code is $RESOURCE_STATUS" | ||
|
||
if [ $RESOURCE_STATUS -eq 200 ]; then | ||
# resource already exists -> nothing to do | ||
echo -e ">>> $RESOURCE_ADDRESS already exists β" | ||
|
||
elif [ $RESOURCE_STATUS -eq 404 ]; then | ||
# resource doesn't exist -> need to create it | ||
echo -e ">>> creating $RESOURCE_ADDRESS because it doesn't exist ..." | ||
# use the file at given path as definition, but first replace all occurences of `PREFIX` | ||
# placeholder within the file with the actual prefix value | ||
TMP_SOURCE_PATH="/tmp/$RESOURCE_DEFINITION_NAME" | ||
sed -e "s/PREFIX/$PREFIX/g" "$INDEX_DEFINITIONS_ROOT/$RESOURCE_DEFINITION_NAME" | tee -a "$TMP_SOURCE_PATH" | ||
curl "${CURL_ARGS[@]}" -XPUT "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS" -H 'Content-Type: application/json' --data "@$TMP_SOURCE_PATH" | ||
|
||
elif [ $RESOURCE_STATUS -eq 403 ]; then | ||
# probably authorization fail | ||
echo -e ">>> forbidden access to $RESOURCE_ADDRESS ! -> exiting" | ||
exit 1 | ||
|
||
else | ||
echo -e "Got response code $TEMPLATE_RESPONSE_CODE while creating template so exiting." | ||
# when `USE_AWS_ELASTICSEARCH` was forgotten to be set to `true` when running against AWS ES OSS, | ||
# this script will use wrong paths (e.g. `_ilm/policy/` instead of AWS-compatible `_opendistro/_ism/policies/`) | ||
# and the ES endpoint will return `401 Unauthorized` or `405 Method Not Allowed` | ||
# let's use this as chance to point that wrong config might be used! | ||
if [ $RESOURCE_STATUS -eq 401 ] || [ $RESOURCE_STATUS -eq 405 ]; then | ||
if [[ $USE_AWS_ELASTICSEARCH == false ]] && [[ $ELASTICSEARCH_URL == *"amazonaws"* ]]; then | ||
echo "... looks like AWS OpenSearch is used; please set USE_AWS_ELASTICSEARCH env value to true" | ||
fi | ||
fi | ||
|
||
echo -e ">>> failed to GET $RESOURCE_ADDRESS ! -> exiting" | ||
exit 1 | ||
fi | ||
} | ||
|
||
# create indices for ES (non-AWS) | ||
function create_datahub_usage_event_datastream() { | ||
# non-AWS env requires creation of two resources for Datahub usage events: | ||
# 1. ILM policy | ||
create_if_not_exists "_ilm/policy/${PREFIX}datahub_usage_event_policy" policy.json | ||
# 2. index template | ||
create_if_not_exists "_index_template/${PREFIX}datahub_usage_event_index_template" index_template.json | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we not need a line like this here:
to ensure that the index is actually created for non aws cases? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I do think there's a separate issue actively open where on fresh quickstarts without any usage events, the analytics index will be missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure how this works outside of AWS. The behavior in non-AWS environment should be the same as before refactoring. Can you link the issue mentioned? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's this PR in question: #5974 |
||
} | ||
|
||
# create indices for ES OSS (AWS) | ||
function create_datahub_usage_event_aws_elasticsearch() { | ||
if [[ -z "$INDEX_PREFIX" ]]; then | ||
PREFIX='' | ||
else | ||
PREFIX="${INDEX_PREFIX}_" | ||
fi | ||
# AWS env requires creation of three resources for Datahub usage events: | ||
# 1. ISM policy | ||
create_if_not_exists "_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" aws_es_ism_policy.json | ||
|
||
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy") -eq 404 ] | ||
then | ||
echo -e "\ncreating datahub_usage_event_policy" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_ism_policy.json | tee -a /tmp/aws_es_ism_policy.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/tmp/aws_es_ism_policy.json | ||
else | ||
echo -e "\ndatahub_usage_event_policy exists" | ||
fi | ||
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template") -eq 404 ] | ||
then | ||
echo -e "\ncreating datahub_usage_event_index_template" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_index_template.json | tee -a /tmp/aws_es_index_template.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/tmp/aws_es_index_template.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/${PREFIX}datahub_usage_event-000001" -H 'Content-Type: application/json' --data "{\"aliases\":{\"${PREFIX}datahub_usage_event\":{\"is_write_index\":true}}}" | ||
else | ||
echo -e "\ndatahub_usage_event_index_template exists" | ||
# 2. index template | ||
create_if_not_exists "_template/${PREFIX}datahub_usage_event_index_template" aws_es_index_template.json | ||
|
||
# 3. event index datahub_usage_event-000001 | ||
# (note that AWS *rollover* indices need to use `^.*-\d+$` naming pattern) | ||
# -> https://aws.amazon.com/premiumsupport/knowledge-center/opensearch-failed-rollover-index/ | ||
INDEX_SUFFIX="000001" | ||
# ... but first check whether `datahub_usage_event` wasn't already autocreated by GMS before `datahub_usage_event-000001` | ||
# (as is common case when this script was initially run without properly setting `USE_AWS_ELASTICSEARCH` to `true`) | ||
# -> https://github.com/datahub-project/datahub/issues/5376 | ||
USAGE_EVENT_STATUS=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}\n" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event") | ||
if [ $USAGE_EVENT_STATUS -eq 200 ]; then | ||
USAGE_EVENT_DEFINITION=$(curl "${CURL_ARGS[@]}" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event") | ||
# the definition is expected to contain "datahub_usage_event-000001" string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Consider adding to comment here) |
||
if [[ $USAGE_EVENT_DEFINITION != *"datahub_usage_event-$INDEX_SUFFIX"* ]]; then | ||
# ... if it doesn't, we need to drop it | ||
echo -e "\n>>> deleting invalid datahub_usage_event ..." | ||
curl "${CURL_ARGS[@]}" -XDELETE "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event" | ||
# ... and then recreate it below | ||
fi | ||
fi | ||
|
||
# ... now we are safe to create the index | ||
create_if_not_exists "${PREFIX}datahub_usage_event-$INDEX_SUFFIX" aws_es_index.json | ||
} | ||
|
||
if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then | ||
|
@@ -119,4 +158,4 @@ else | |
elif [ $DATAHUB_USAGE_EVENT_INDEX_RESPONSE_CODE -eq 403 ]; then | ||
echo -e "Forbidden so exiting" | ||
fi | ||
fi | ||
fi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"aliases": { | ||
"PREFIXdatahub_usage_event": { | ||
"is_write_index": true | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thank you!