Skip to content

Commit

Permalink
feat(notify): Stop using the slack-notifier base (#33928)
Browse files Browse the repository at this point in the history
  • Loading branch information
chouetz authored Feb 13, 2025
1 parent 7c50b3f commit b62cc23
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
# internal_kubernetes_deploy stage
# Contains jobs to trigger a pipeline in our k8s-datadog-agent-ops repo

include:
- https://gitlab-templates.ddbuild.io/slack-notifier/v3-sdm/template.yml

internal_kubernetes_deploy_experimental:
stage: internal_kubernetes_deploy
rules:
Expand Down Expand Up @@ -68,7 +65,7 @@ internal_kubernetes_deploy_experimental:

notify-slack:
stage: internal_kubernetes_deploy
extends: .slack-notifier-base
image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_arm64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
rules:
- if: $FORCE_K8S_DEPLOYMENT == "true"
when: always
Expand All @@ -79,9 +76,10 @@ notify-slack:
- if: $APPS !~ "/^datadog-agent/"
when: never
- !reference [.on_deploy]
tags: ["arch:amd64"]
tags: ["arch:arm64"]
needs: ["internal_kubernetes_deploy_experimental"]
script:
- export SDM_JWT=$(vault read -field=token identity/oidc/token/sdm)
- python3 -m pip install -r tasks/requirements.txt
- python3 -m pip install -r tasks/requirements.txt -r tasks/libs/requirements-notifications.txt
- SLACK_API_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $SLACK_AGENT token) || exit $?; export SLACK_API_TOKEN
- inv pipeline.changelog ${CI_COMMIT_SHORT_SHA} || exit $?
35 changes: 16 additions & 19 deletions .gitlab/notify/notify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,35 @@
# notify stage
# Contains jobs which send notifications depending on pipeline status.

include:
- https://gitlab-templates.ddbuild.io/slack-notifier/v3-sdm/template.yml
.notify_setup:
- SLACK_API_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $SLACK_AGENT token) || exit $?; export SLACK_API_TOKEN
- GITLAB_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $GITLAB_TOKEN read_api) || exit $?; export GITLAB_TOKEN
- DD_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_API_KEY_ORG2 token) || exit $?; export DD_API_KEY
- python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt

notify-on-tagged-success:
extends: .slack-notifier-base
stage: notify
image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_arm64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
rules: !reference [.on_deploy_stable_or_beta_repo_branch]
dependencies: []
tags: ["arch:amd64"]
tags: ["arch:arm64"]
script: |
MESSAGE_TEXT=":host-green: Tagged build <$CI_PIPELINE_URL|$CI_PIPELINE_ID> succeeded.
*$CI_COMMIT_REF_NAME* is available in the staging repositories."
postmessage "#agent-release-sync" "$MESSAGE_TEXT"
python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt
SLACK_API_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $SLACK_AGENT token) || exit $?; export SLACK_API_TOKEN
invoke notify.post-message -c "#agent-release-sync" -m "$MESSAGE_TEXT"
notify:
extends: .slack-notifier-base
stage: notify
image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_arm64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
rules: !reference [.on_main_or_release_branch_or_deploy_always]
dependencies: []
tags: ["arch:amd64"]
tags: ["arch:arm64"]
resource_group: notification
timeout: 15 minutes # Added to prevent a stuck job blocking the resource_group defined above
script:
- GITLAB_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $GITLAB_TOKEN read_api) || exit $?; export GITLAB_TOKEN
- DD_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_API_KEY_ORG2 token) || exit $?; export DD_API_KEY
- python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt
- !reference [.notify_setup]
- invoke -e notify.send-message -p $CI_PIPELINE_ID
- invoke -e notify.check-consistent-failures -p $CI_PIPELINE_ID

Expand All @@ -38,8 +41,7 @@ send_pipeline_stats:
when: always
dependencies: []
script:
- GITLAB_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $GITLAB_TOKEN read_api) || exit $?; export GITLAB_TOKEN
- DD_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_API_KEY_ORG2 token) || exit $?; export DD_API_KEY
- !reference [.notify_setup]
- invoke -e notify.send-stats

notify_github:
Expand Down Expand Up @@ -105,11 +107,6 @@ notify_gitlab_ci_changes:
resource_group: notification
timeout: 15 minutes # Added to prevent a stuck job blocking the resource_group defined above

.failure_summary_setup:
- SLACK_API_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $SLACK_AGENT token) || exit $?; export SLACK_API_TOKEN
- GITLAB_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $GITLAB_TOKEN read_api) || exit $?; export GITLAB_TOKEN
- DD_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_API_KEY_ORG2 token) || exit $?; export DD_API_KEY
- python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt

# Upload failure summary data to S3 at the end of each main pipeline
notify_failure_summary_on_pipeline:
Expand All @@ -119,7 +116,7 @@ notify_failure_summary_on_pipeline:
when: never
- !reference [.on_main_always]
script:
- !reference [.failure_summary_setup]
- !reference [.notify_setup]
- inv -e notify.failure-summary-upload-pipeline-data

# Send failure summary notifications daily and weekly
Expand All @@ -130,7 +127,7 @@ notify_failure_summary_daily:
when: never
- !reference [.on_deploy_nightly_repo_branch_always]
script:
- !reference [.failure_summary_setup]
- !reference [.notify_setup]
- weekday="$(date --utc '+%A')"
- |
if [ "$weekday" = "Sunday" ] || [ "$weekday" = "Monday" ]; then
Expand Down
13 changes: 6 additions & 7 deletions .gitlab/trigger_release/trigger_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ trigger_manual_prod_release_on_failure:
when: never
- !reference [.on_deploy_stable_on_failure]

include:
- https://gitlab-templates.ddbuild.io/slack-notifier/v3-sdm/template.yml

.setup_github_app_agent_platform_auto_pr:
# GitHub App rate-limits are per-app. Since we are rarely calling the job, we are only using the instance 2
- |
Expand All @@ -90,9 +87,9 @@ include:
generate_windows_gitlab_runner_bump_pr:
stage: trigger_release
extends: .slack-notifier-base
image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_arm64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
needs: ["trigger_auto_staging_release"]
tags: ["arch:amd64"]
tags: ["arch:arm64"]
rules:
- if: $DDR_WORKFLOW_ID != null
when: never
Expand All @@ -105,14 +102,15 @@ generate_windows_gitlab_runner_bump_pr:
- !reference [.setup_github_app_agent_platform_auto_pr]
- python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt
- $S3_CP_CMD $S3_ARTIFACTS_URI/agent-version.cache .
- SLACK_API_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $SLACK_AGENT token) || exit $?; export SLACK_API_TOKEN
- inv -e github.update-windows-runner-version

# Manual job to generate the gitlab bump pr on buildenv if trigger_auto_staging_release fails
generate_windows_gitlab_runner_bump_pr_manual:
stage: trigger_release
extends: .slack-notifier-base
image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_arm64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
needs: ["trigger_auto_staging_release"]
tags: ["arch:amd64"]
tags: ["arch:arm64"]
rules:
- if: $DDR_WORKFLOW_ID != null
when: never
Expand All @@ -125,4 +123,5 @@ generate_windows_gitlab_runner_bump_pr_manual:
- !reference [.setup_github_app_agent_platform_auto_pr]
- python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt
- $S3_CP_CMD $S3_ARTIFACTS_URI/agent-version.cache .
- SLACK_API_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $SLACK_AGENT token) || exit $?; export SLACK_API_TOKEN
- inv -e github.update-windows-runner-version
6 changes: 4 additions & 2 deletions tasks/github_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from tasks.libs.common.datadog_api import create_gauge, send_event, send_metrics
from tasks.libs.common.git import get_default_branch
from tasks.libs.common.utils import get_git_pretty_ref
from tasks.libs.notify.pipeline_status import send_slack_message
from tasks.libs.owners.linter import codeowner_has_orphans, directory_has_packages_without_owner
from tasks.libs.owners.parsing import read_owners
from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP
Expand Down Expand Up @@ -143,7 +142,10 @@ def _update_windows_runner_version(new_version=None, buildenv_ref="master"):

message = f":robobits: A new windows-runner bump PR to {new_version} has been generated. Please take a look :frog-review:\n:pr: {PR_URL} :ty:"

send_slack_message("ci-infra-support", message)
from slack_sdk import WebClient

client = WebClient(token=os.environ["SLACK_API_TOKEN"])
client.chat_postMessage(channel="ci-infra-support", text=message)
return workflow_conclusion


Expand Down
6 changes: 4 additions & 2 deletions tasks/libs/notify/alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from tasks.libs.pipeline.data import get_failed_jobs
from tasks.libs.pipeline.notifications import (
get_pr_from_commit,
send_slack_message,
)
from tasks.owners import channel_owners, make_partition

Expand Down Expand Up @@ -232,7 +231,10 @@ def send_alert(channel, consecutive: ConsecutiveJobAlert, cumulative: Cumulative
message = message.strip()

if message:
send_slack_message(channel, message)
from slack_sdk import WebClient

client = WebClient(token=os.environ["SLACK_API_TOKEN"])
client.chat_postMessage(channel=channel, text=message)

# Create metrics for consecutive and cumulative alerts
return [
Expand Down
23 changes: 17 additions & 6 deletions tasks/libs/notify/pipeline_status.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
import re
import sys

from tasks.libs.ciproviders.gitlab_api import get_commit, get_pipeline
from tasks.libs.common.git import get_default_branch
from tasks.libs.common.utils import Color, color_message
from tasks.libs.notify.utils import DEPLOY_PIPELINES_CHANNEL, PIPELINES_CHANNEL, PROJECT_NAME, get_pipeline_type
from tasks.libs.pipeline.data import get_failed_jobs
from tasks.libs.pipeline.notifications import (
base_message,
email_to_slackid,
get_failed_tests,
send_slack_message,
)
from tasks.libs.types.types import SlackMessage

Expand Down Expand Up @@ -58,16 +59,26 @@ def send_message(ctx, pipeline_id, dry_run):
for test in get_failed_tests(PROJECT_NAME, job):
message.add_test_failure(test, job)

# Send messages
# Send message
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

client = WebClient(token=os.environ["SLACK_API_TOKEN"])
if dry_run:
print(f"Would send to {slack_channel}:\n{str(message)}")
else:
send_slack_message(slack_channel, str(message))
client.chat_postMessage(channel=slack_channel, text=str(message))

if should_send_message_to_author(pipeline.ref, get_default_branch()):
author_email = commit.author_email
if dry_run:
print(f"Would send to {author_email}:\n{str(message)}")
else:
recipient = email_to_slackid(ctx, author_email)
send_slack_message(recipient, str(message))
try:
recipient = client.users_lookupByEmail(email=author_email)
client.chat_postMessage(channel=recipient.data['user']['id'], text=str(message))
except SlackApiError as e:
print(
f"[{color_message('ERROR', Color.RED)}] Failed to send message to {author_email}: {e.response['error']}",
file=sys.stderr,
)
19 changes: 4 additions & 15 deletions tasks/libs/pipeline/notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
import os
import pathlib
import re
import subprocess
from collections import defaultdict
from datetime import datetime, timezone

import gitlab
import yaml
from gitlab.v4.objects import ProjectCommit, ProjectJob, ProjectPipeline
from invoke.context import Context

from tasks.libs.ciproviders.gitlab_api import get_gitlab_repo
from tasks.libs.owners.parsing import read_owners
Expand Down Expand Up @@ -157,6 +155,9 @@ def base_message(project_name: str, pipeline: ProjectPipeline, commit: ProjectCo
commit_url_github = f"{GITHUB_BASE_URL}/{project_name}/commit/{commit.id}"
commit_short_sha = commit.id[-8:]
author = commit.author_name
finish = datetime.fromisoformat(pipeline.finished_at) if pipeline.finished_at else datetime.now(timezone.utc)
delta = finish - datetime.fromisoformat(pipeline.started_at)
duration = f"[:hourglass: {int(delta.total_seconds() / 60)} min]"

# Try to find a PR id (e.g #12345) in the commit title and add a link to it in the message if found.
pr_info = get_pr_from_commit(commit_title, project_name)
Expand All @@ -165,22 +166,10 @@ def base_message(project_name: str, pipeline: ProjectPipeline, commit: ProjectCo
parsed_pr_id, pr_url_github = pr_info
enhanced_commit_title = enhanced_commit_title.replace(f"#{parsed_pr_id}", f"<{pr_url_github}|#{parsed_pr_id}>")

return f"""{header} pipeline <{pipeline_url}|{pipeline_id}> for {commit_ref_name} {state}.
return f"""{header} pipeline <{pipeline_url}|{pipeline_id}> for {commit_ref_name} {state} {duration}.
{enhanced_commit_title} (<{commit_url_gitlab}|{commit_short_sha}>)(:github: <{commit_url_github}|link>) by {author}"""


def send_slack_message(recipient, message):
subprocess.run(["postmessage", recipient, message], check=True)


def email_to_slackid(ctx: Context, email: str) -> str:
slackid = ctx.run(f"echo '{email}' | email2slackid", hide=True, warn=True).stdout.strip()

assert slackid != '', 'Email not found'

return slackid


def warn_new_commits(release_managers, team, branch, next_rc):
from slack_sdk import WebClient

Expand Down
11 changes: 11 additions & 0 deletions tasks/notify.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,14 @@ def close_failing_tests_stale_issues(_, dry_run=False):
print(f'Error closing issue {issue["key"]}: {e}', file=sys.stderr)

print(f'Closed {n_closed} issues without failing tests')


@task
def post_message(_: Context, channel: str, message: str):
"""
Post a message to a slack channel
"""
from slack_sdk import WebClient

client = WebClient(token=os.environ['SLACK_API_TOKEN'])
client.chat_postMessage(channel=channel, text=message)
8 changes: 5 additions & 3 deletions tasks/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
is_allowed_repo_branch,
)
from tasks.libs.owners.parsing import read_owners
from tasks.libs.pipeline.notifications import send_slack_message
from tasks.libs.pipeline.tools import (
FilteredOutException,
cancel_pipelines_with_confirmation,
Expand Down Expand Up @@ -475,6 +474,9 @@ def is_system_probe(owners, files):

@task
def changelog(ctx, new_commit_sha):
from slack_sdk import WebClient

client = WebClient(token=os.environ["SLACK_API_TOKEN"])
# Environment variable to deal with both local and CI environments
if "CI_PROJECT_DIR" in os.environ:
parent_dir = os.environ["CI_PROJECT_DIR"]
Expand Down Expand Up @@ -502,7 +504,7 @@ def changelog(ctx, new_commit_sha):
if old_commit_sha == new_commit_sha:
print("No new commits found, exiting")
slack_message += no_commits_msg
send_slack_message("system-probe-ops", slack_message)
client.chat_postMessage(channel="system-probe-ops", text=slack_message)
return

print(f"Generating changelog for commit range {old_commit_sha} to {new_commit_sha}")
Expand Down Expand Up @@ -543,7 +545,7 @@ def changelog(ctx, new_commit_sha):
slack_message += empty_changelog_msg

print(f"Posting message to slack: \n {slack_message}")
send_slack_message("system-probe-ops", slack_message)
client.chat_postMessage(channel="system-probe-ops", text=slack_message)
print(f"Writing new commit sha: {new_commit_sha} to SSM")
res = ctx.run(
f"aws ssm put-parameter --name ci.datadog-agent.gitlab_changelog_commit_sha --value {new_commit_sha} "
Expand Down
Loading

0 comments on commit b62cc23

Please sign in to comment.