Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] retry if environmental issues #22662

Closed
wants to merge 16 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 110 additions & 43 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

@Library('apm@current') _

import groovy.transform.Field

/**
This is required to store any environmental issues to retry if so
*/
@Field def environmentalIssues = [:]

pipeline {
agent { label 'ubuntu-18 && immutable' }
environment {
Expand Down Expand Up @@ -160,6 +167,9 @@ VERSION=${env.VERSION}-SNAPSHOT""")
analyzeFlakey: !isTag(), flakyReportIdx: "reporter-beats-beats-${getIdSuffix()}")
}
}
always {
notifyEnvironmentalIssues()
}
}
}

Expand Down Expand Up @@ -465,12 +475,31 @@ def e2e(Map args = [:]) {
}
}

/**
* This method is a wrapper to run the runCommand method and retry if there are
* environmental issues. Therefore it passes the arguments to the runCommand.
* For further details regarding the arguments please refers to the runCommand method.
*/
def target(Map args = [:]) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do think about a more descriptive name? I acknowledge references should be updated.

Suggested change
def target(Map args = [:]) {
def safeRunCommand(Map args = [:]) {

Alternatives?

  • runCommandWithEnviromentalIssues (longer)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was not intended to change the name at first, though it might be worth to add more clarity.

What do you think runCommandWithRetry?

try {
runCommand(args)
} catch (err) {
if(environmentalIssues?.get(args.id, false)) {
// Retry if environmental issues
sleep randomNumber(min: 10, max: 30)
runCommand(args)
} else {
error("Error '${err.toString()}'")
}
}
}

/**
* This method runs the given command supporting two kind of scenarios:
* - make -C <folder> then the dir(location) is not required, aka by disaling isMage: false
* - mage then the dir(location) is required, aka by enabling isMage: true.
*/
def target(Map args = [:]) {
def runCommand(Map args = [:]) {
def command = args.command
def context = args.context
def directory = args.get('directory', '')
Expand Down Expand Up @@ -523,7 +552,8 @@ def withBeatsEnv(Map args = [:], Closure body) {
def archive = args.get('archive', true)
def withModule = args.get('withModule', false)
def directory = args.get('directory', '')

def environmentalIssue = true
def uploadGeneratedFiles = false // Skip the uploading of generated files by default.
def goRoot, path, magefile, pythonEnv, testResults, artifacts, gox_flags, userProfile

if(isUnix()) {
Expand Down Expand Up @@ -555,60 +585,97 @@ def withBeatsEnv(Map args = [:], Closure body) {
gox_flags = '-arch 386'
}

deleteDir()
unstashV2(name: 'source', bucket: "${JOB_GCS_BUCKET}", credentialsId: "${JOB_GCS_CREDENTIALS}")
// NOTE: This is required to run after the unstash
def module = withModule ? getCommonModuleInTheChangeSet(directory) : ''
withEnv([
"DOCKER_PULL=0",
"GOPATH=${env.WORKSPACE}",
"GOROOT=${goRoot}",
"GOX_FLAGS=${gox_flags}",
"HOME=${env.WORKSPACE}",
"MAGEFILE_CACHE=${magefile}",
"MODULE=${module}",
"PATH=${path}",
"PYTHON_ENV=${pythonEnv}",
"RACE_DETECTOR=true",
"TEST_COVERAGE=true",
"TEST_TAGS=${env.TEST_TAGS},oracle",
"OLD_USERPROFILE=${env.USERPROFILE}",
"USERPROFILE=${userProfile}"
]) {
if(isDockerInstalled()) {
dockerLogin(secret: "${DOCKER_ELASTIC_SECRET}", registry: "${DOCKER_REGISTRY}")
}
dir("${env.BASE_DIR}") {
installTools(args)
// Skip to upload the generated files by default.
def upload = false
try {
try {
deleteDir()
unstashV2(name: 'source', bucket: "${JOB_GCS_BUCKET}", credentialsId: "${JOB_GCS_CREDENTIALS}")
// NOTE: This is required to run after the unstash
def module = withModule ? getCommonModuleInTheChangeSet(directory) : ''
withEnv([
"DOCKER_PULL=0",
"GOPATH=${env.WORKSPACE}",
"GOROOT=${goRoot}",
"GOX_FLAGS=${gox_flags}",
"HOME=${env.WORKSPACE}",
"MAGEFILE_CACHE=${magefile}",
"MODULE=${module}",
"PATH=${path}",
"PYTHON_ENV=${pythonEnv}",
"RACE_DETECTOR=true",
"TEST_COVERAGE=true",
"TEST_TAGS=${env.TEST_TAGS},oracle",
"OLD_USERPROFILE=${env.USERPROFILE}",
"USERPROFILE=${userProfile}"
]) {
if(isDockerInstalled()) {
dockerLogin(secret: "${DOCKER_ELASTIC_SECRET}", registry: "${DOCKER_REGISTRY}")
}
dir("${env.BASE_DIR}") {
installTools(args)
// Pre-requisites to configure the environment were ok.
environmentalIssue = false
// Add more stability when dependencies are not accessible temporarily
// See https://github.com/elastic/beats/issues/21609
// retry/try/catch approach reports errors, let's avoid it to keep the
// notifications cleaner.
if (cmd(label: 'Download modules to local cache', script: 'go mod download', returnStatus: true) > 0) {
cmd(label: 'Download modules to local cache - retry', script: 'go mod download', returnStatus: true)
}
body()
} catch(err) {
// Upload the generated files ONLY if the step failed. This will avoid any overhead with Google Storage
upload = true
error("Error '${err.toString()}'")
} finally {
if (archive) {
archiveTestOutput(testResults: testResults, artifacts: artifacts, id: args.id, upload: upload)
}
// Tear down the setup for the permamnent workers.
catchError(buildResult: 'SUCCESS', stageResult: 'SUCCESS') {
fixPermissions("${WORKSPACE}")
deleteDir()
}
uploadGeneratedFiles = true
body()
// Skip the generated files by default.
uploadGeneratedFiles = false
}
}
} finally {
// If there are environmental issues then let's avoid the archiving of none files.
if (archive && !environmentalIssue) {
dir("${env.BASE_DIR}") {
archiveTestOutput(testResults: testResults, artifacts: artifacts, id: args.id, upload: uploadGeneratedFiles)
}
}
// Tear down the setup for the permanent workers.
catchError(buildResult: 'SUCCESS', stageResult: 'SUCCESS') {
fixPermissions("${WORKSPACE}")
deleteDir()
}
analyseEnvironmentalIssues(id: args.id, environmentalIssue: environmentalIssue)
}
}

/**
* This method analyse if the existing stage failed with some environmental issues.
* So far the analysis is just purely based on a boolean that detects if the installation of
* the required tools for building and testing in the agent were successfully installed or
* some unexpected pre-building/testing errors, such as deleting the workspace.
* We can use this method in the future to analyse the build logs for searching specific patterns
* such as, not able to access the docker registry, or failed when pulling some docker images.
*/
def analyseEnvironmentalIssues(args) {
environmentalIssues[args.id] = args.environmentalIssue
}

/**
* This method runs only as a post build action to notify when there are environmental issues.
* Archive the file with the environmental issues details in case there are any
* reported environmental issues.
* TODO: Create JSON file to upload those details to elasticsearch
v1v marked this conversation as resolved.
Show resolved Hide resolved
* This won't be needed as soon as we use the APM transactions
*/
def notifyEnvironmentalIssues(Map args = [:]) {
stage('Notify environmental issues'){
def content = ''
environmentalIssues?.findAll { k, v -> return v }.each { k, v ->
content += "${k} failed with some environmental issues (${v})\n"
}
if (content?.trim()) {
writeFile file: 'environmental.issues.txt', text: content
archiveArtifacts artifacts: 'environmental.issues.txt'
}
}
}


/**
* This method fixes the filesystem permissions after the build has happenend. The reason is to
* ensure any non-ephemeral workers don't have any leftovers that could cause some environmental
Expand Down