-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(elasticsearch) Analytics indices creation on AWS ES #5502
Changes from 6 commits
cdee86f
2c61723
362c47c
2e1b5e6
51dac7b
2cf8276
b7433f9
750b2d0
92c2673
9236d2e
31ac516
944984c
e820132
aa6cf0d
71653b0
43fcec5
0873672
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,71 +5,96 @@ set -e | |
: ${DATAHUB_ANALYTICS_ENABLED:=true} | ||
: ${USE_AWS_ELASTICSEARCH:=false} | ||
|
||
# protocol: http or https? | ||
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then | ||
ELASTICSEARCH_PROTOCOL=https | ||
else | ||
ELASTICSEARCH_PROTOCOL=http | ||
fi | ||
|
||
if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then | ||
AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) | ||
ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" | ||
fi | ||
# Elasticsearch URL to be suffixed with a resource address | ||
ELASTICSEARCH_URL="$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT" | ||
|
||
# Add default header if needed | ||
# set auth header if none is given | ||
if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then | ||
ELASTICSEARCH_AUTH_HEADER="Accept: */*" | ||
fi | ||
|
||
function create_datahub_usage_event_datastream() { | ||
if [[ -z "$INDEX_PREFIX" ]]; then | ||
PREFIX='' | ||
if [[ ! -z $ELASTICSEARCH_USERNAME ]]; then | ||
# no auth header given, but username is defined -> use it to create the auth header | ||
AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) | ||
ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" | ||
else | ||
PREFIX="${INDEX_PREFIX}_" | ||
# no auth header or username given -> use default auth header | ||
ELASTICSEARCH_AUTH_HEADER="Accept: */*" | ||
fi | ||
fi | ||
|
||
# index prefix used throughout the script | ||
if [[ -z "$INDEX_PREFIX" ]]; then | ||
PREFIX='' | ||
else | ||
PREFIX="${INDEX_PREFIX}_" | ||
fi | ||
|
||
# path where index definitions are stored | ||
INDEX_DEFINITIONS_ROOT=/index/usage-event | ||
|
||
|
||
# check Elasticsearch for given index/resource (first argument) | ||
# if it doesn't exist (http code 404), use the given file (second argument) to create it | ||
function create_if_not_exists { | ||
RESOURCE_ADDRESS="$1" | ||
RESOURCE_DEFINITION_NAME="$2" | ||
|
||
# query ES to see if the resource already exists | ||
RESOURCE_STATUS=$(curl -o /dev/null -s -w "%{http_code}\n" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS") | ||
|
||
if [ $RESOURCE_STATUS -eq 200 ]; then | ||
# resource already exists -> nothing to do | ||
echo -e "\n>>> $RESOURCE_ADDRESS already exists β" | ||
|
||
elif [ $RESOURCE_STATUS -eq 404 ]; then | ||
# resource doesn't exist -> need to create it | ||
echo -e "\n>>> creating $RESOURCE_ADDRESS ..." | ||
# use the given path as definition, but first replace all occurences of PREFIX with the actual prefix | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these comments are super helper. thank you |
||
TMP_SOURCE_PATH="/tmp/$RESOURCE_DEFINITION_NAME" | ||
sed -e "s/PREFIX/$PREFIX/g" "$INDEX_DEFINITIONS_ROOT/$RESOURCE_DEFINITION_NAME" | tee -a "$TMP_SOURCE_PATH" | ||
curl -s -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS" -H 'Content-Type: application/json' --data "@$TMP_SOURCE_PATH" | ||
|
||
elif [ $RESOURCE_STATUS -eq 401 ] || [ $RESOURCE_STATUS -eq 405 ]; then | ||
echo -e "\n>>> failed to GET $RESOURCE_ADDRESS ($RESOURCE_STATUS) !" | ||
echo "... make sure you have correct USE_AWS_ELASTICSEARCH env value set (current=$USE_AWS_ELASTICSEARCH)" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why AWS specific? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not having However I can see this looks too specific for a general-purpose function. Let me add a condition checking whether |
||
exit 1 | ||
|
||
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy") -eq 404 ] | ||
then | ||
echo -e "\ncreating datahub_usage_event_policy" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/policy.json | tee -a /tmp/policy.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/tmp/policy.json | ||
else | ||
echo -e "\ndatahub_usage_event_policy exists" | ||
fi | ||
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template") -eq 404 ] | ||
then | ||
echo -e "\ncreating datahub_usage_event_index_template" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/index_template.json | tee -a /tmp/index_template.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/tmp/index_template.json | ||
else | ||
echo -e "\ndatahub_usage_event_index_template exists" | ||
echo -e "\n>>> unexpected $RESOURCE_ADDRESS status $RESOURCE_STATUS !" | ||
exit 1 | ||
fi | ||
} | ||
|
||
# create indices for ES (non-AWS) | ||
function create_datahub_usage_event_datastream() { | ||
create_if_not_exists "_ilm/policy/${PREFIX}datahub_usage_event_policy" policy.json | ||
create_if_not_exists "_index_template/${PREFIX}datahub_usage_event_index_template" index_template.json | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we not need a line like this here:
to ensure that the index is actually created for non aws cases? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I do think there's a separate issue actively open where on fresh quickstarts without any usage events, the analytics index will be missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure how this works outside of AWS. The behavior in non-AWS environment should be the same as before refactoring. Can you link the issue mentioned? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's this PR in question: #5974 |
||
} | ||
|
||
# create indices for ES OSS (AWS) | ||
function create_datahub_usage_event_aws_elasticsearch() { | ||
if [[ -z "$INDEX_PREFIX" ]]; then | ||
PREFIX='' | ||
else | ||
PREFIX="${INDEX_PREFIX}_" | ||
fi | ||
create_if_not_exists "_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" aws_es_ism_policy.json | ||
create_if_not_exists "_template/${PREFIX}datahub_usage_event_index_template" aws_es_index_template.json | ||
|
||
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy") -eq 404 ] | ||
then | ||
echo -e "\ncreating datahub_usage_event_policy" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_ism_policy.json | tee -a /tmp/aws_es_ism_policy.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/tmp/aws_es_ism_policy.json | ||
else | ||
echo -e "\ndatahub_usage_event_policy exists" | ||
fi | ||
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template") -eq 404 ] | ||
then | ||
echo -e "\ncreating datahub_usage_event_index_template" | ||
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_index_template.json | tee -a /tmp/aws_es_index_template.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/tmp/aws_es_index_template.json | ||
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/${PREFIX}datahub_usage_event-000001" -H 'Content-Type: application/json' --data "{\"aliases\":{\"${PREFIX}datahub_usage_event\":{\"is_write_index\":true}}}" | ||
else | ||
echo -e "\ndatahub_usage_event_index_template exists" | ||
# this fixes the case when datahub_usage_event was created by GMS before datahub_usage_event-000001 | ||
USAGE_EVENT_STATUS=$(curl -o /dev/null -s -w "%{http_code}\n" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event") | ||
if [ $USAGE_EVENT_STATUS -eq 200 ]; then | ||
USAGE_EVENT_DEFINITION=$(curl -s --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event") | ||
# the definition is expected to contain "datahub_usage_event-000001" string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Consider adding to comment here) |
||
if [[ $USAGE_EVENT_DEFINITION != *"datahub_usage_event-000001"* ]]; then | ||
# ... if it doesn't, we need to drop it | ||
echo -e "\n>>> deleting invalid datahub_usage_event ..." | ||
curl -s -XDELETE --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deleting an index in the setup job seems dangerous. We need to be EXTREMELY careful that this doesn't affect existing deployments. Can you explain a bit more why this drop is necessary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will analytics necessarily not work if the conditions is met to enter this block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, if the index was previously created with This block is executed only if What could go wrong? Maybe if the name of the index is later changed and this condition is not adjusted appropriately. Let me extract it to a constant to be safe and add some more comments in the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropping is probably too harsh here, we have an alias. If it's a mapping that needs to be fixed it should use the reindex API rather than a full drop of the index. |
||
# ... and then recreate it below | ||
fi | ||
fi | ||
|
||
create_if_not_exists "${PREFIX}datahub_usage_event-000001" aws_es_usage_event.json | ||
} | ||
|
||
if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
jjoyce0510 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"aliases": { | ||
"PREFIXdatahub_usage_event": { | ||
"is_write_index": true | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thank you!