From 3f2898b92d0416183be89c01e77904d139d94680 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Wed, 2 Oct 2024 11:50:20 +0300 Subject: [PATCH 01/18] adjusted pipeline --- .buildkite/scripts/agentbeat/prepare_env.py | 57 +++++ .../x-pack/pipeline.xpack.agentbeat.yml | 202 +++++++++++------- 2 files changed, 184 insertions(+), 75 deletions(-) create mode 100755 .buildkite/scripts/agentbeat/prepare_env.py diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py new file mode 100755 index 000000000000..50f7d8123299 --- /dev/null +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +import os +import platform +import subprocess + + +def get_os() -> str: + return platform.system() + + +def get_arch() -> str: + return platform.machine() + + +def get_cwd() -> str: + return os.getcwd() + + +def download_agentbeat_artifact(os, arch): + pattern = "x-pack/agentbeat/build/distributions/agentbeat-9.0.0-SNAPSHOT-linux-x86_64.tar.gz" + # pattern = "x-pack/agentbeat/build/distributions/**" + # command = f"buildkite-agent artifact download \"{pattern}\" . --step 'agentbeat-package-linux'" + + try: + print("--- Downloading agentbeat artifact") + result = subprocess.run( + ["buildkite-agent", "artifact", "download", pattern, ".", + "--build", "01924d2b-b061-45ae-a106-e885584ff26f", + "--step", "agentbeat-package-linux"], + check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(result.stdout.decode()) + except subprocess.CalledProcessError as e: + print("--- Error occurred while downloading agentbeat\n" + e.stderr) + exit(1) + + +def install_synthetics(): + try: + print("--- Installing @elastic/synthetics") + subprocess.run( + ["npm install -g @elastic/synthetics"], + check=True + ) + except subprocess.CalledProcessError: + print("Failed to install @elastic/synthetics") + exit(1) + + +# print("--- OS: " + get_os()) +# +# print("--- ARCH: " + get_arch()) +# +# print("--- CWD: " + get_cwd()) + +download_agentbeat_artifact(get_os(), get_arch()) +# install_synthetics() diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index ef7cb1598aa4..0af413435cbc 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -1,101 +1,153 @@ env: ASDF_MAGE_VERSION: 1.15.0 + ASDF_NODEJS_VERSION: 18.17.1 + GCP_HI_PERF_MACHINE_TYPE: "c2d-highcpu-16" IMAGE_UBUNTU_X86_64: "family/platform-ingest-beats-ubuntu-2204" + IMAGE_MACOS_ARM: "generic-13-ventura-arm" + IMAGE_MACOS_X86_64: "generic-13-ventura-x64" + + IMAGE_WIN_2022: "family/platform-ingest-beats-windows-2022" + IMAGE_BEATS_WITH_HOOKS_LATEST: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:latest" steps: - - group: "Check/Update" - key: "x-pack-agentbeat-check-update" - - steps: - - label: "agentbeat: Run pre-commit" - command: "pre-commit run --all-files" - agents: - image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" - memory: "2Gi" - useCustomGlobalHooks: true - notify: - - github_commit_status: - context: "agentbeat: pre-commit" +# - group: "Check/Update" +# key: "x-pack-agentbeat-check-update" +# +# steps: +# - label: "agentbeat: Run pre-commit" +# command: "pre-commit run --all-files" +# agents: +# image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" +# memory: "2Gi" +# useCustomGlobalHooks: true +# notify: +# - github_commit_status: +# context: "agentbeat: pre-commit" - - wait: ~ - # with PRs, we want to run mandatory tests only if check/update step succeed - # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests - # this allows building DRA artifacts even if there is flakiness in check/update step - if: build.env("BUILDKITE_PULL_REQUEST") != "false" - depends_on: "x-pack-agentbeat-check-update" +# - wait: ~ +# # with PRs, we want to run mandatory tests only if check/update step succeed +# # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests +# # this allows building DRA artifacts even if there is flakiness in check/update step +# if: build.env("BUILDKITE_PULL_REQUEST") != "false" +# depends_on: "x-pack-agentbeat-check-update" - group: "Agentbeat tests" key: "agentbeat-mandatory-tests" steps: - - label: ":linux: Agentbeat packaging Linux" - key: "agentbeat-package-linux" - env: - PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" - SNAPSHOT: true - command: | - set -euo pipefail - cd x-pack/agentbeat - mage package - artifact_paths: - - x-pack/agentbeat/build/distributions/**/* - - "x-pack/agentbeat/build/*.xml" - - "x-pack/agentbeat/build/*.json" - retry: - automatic: - - limit: 2 - timeout_in_minutes: 60 - agents: - provider: "gcp" - image: "${IMAGE_UBUNTU_X86_64}" - machineType: "${GCP_HI_PERF_MACHINE_TYPE}" - disk_size: 100 - disk_type: "pd-ssd" - notify: - - github_commit_status: - context: "agentbeat: Packaging" +# - label: ":linux: Agentbeat packaging Linux" +# key: "agentbeat-package-linux" +# env: +# PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" +# SNAPSHOT: true +# command: | +# set -euo pipefail +# cd x-pack/agentbeat +# mage package +# artifact_paths: +# - x-pack/agentbeat/build/distributions/**/* +# - "x-pack/agentbeat/build/*.xml" +# - "x-pack/agentbeat/build/*.json" +# retry: +# automatic: +# - limit: 2 +# timeout_in_minutes: 60 +# agents: +# provider: "gcp" +# image: "${IMAGE_UBUNTU_X86_64}" +# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" +# disk_size: 100 +# disk_type: "pd-ssd" +# notify: +# - github_commit_status: +# context: "agentbeat: Packaging" + + # - label: ":linux: Agentbeat/Integration tests Linux" + # key: "agentbeat-it-linux" + # depends_on: + # - agentbeat-package-linux + # env: + # ASDF_NODEJS_VERSION: 18.17.1 + # PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" + # SNAPSHOT: true + # command: | + # set -euo pipefail + # echo "~~~ Downloading artifacts" + # buildkite-agent artifact download x-pack/agentbeat/build/distributions/** . --step 'agentbeat-package-linux' + # ls -lah x-pack/agentbeat/build/distributions/ + # echo "~~~ Installing @elastic/synthetics with npm" + # npm install -g @elastic/synthetics + # echo "~~~ Running tests" + # cd x-pack/agentbeat + # mage goIntegTest + # artifact_paths: + # - x-pack/agentbeat/build/distributions/**/* + # - "x-pack/agentbeat/build/*.xml" + # - "x-pack/agentbeat/build/*.json" + # retry: + # automatic: + # - limit: 1 + # timeout_in_minutes: 60 + # agents: + # provider: "gcp" + # image: "${IMAGE_UBUNTU_X86_64}" + # machineType: "${GCP_HI_PERF_MACHINE_TYPE}" + # disk_size: 100 + # disk_type: "pd-ssd" + # notify: + # - github_commit_status: + # context: "agentbeat: Integration tests" - label: ":linux: Agentbeat/Integration tests Linux" key: "agentbeat-it-linux" - depends_on: - - agentbeat-package-linux - env: - ASDF_NODEJS_VERSION: 18.17.1 - PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" - SNAPSHOT: true +# depends_on: +# - agentbeat-package-linux command: | set -euo pipefail - echo "~~~ Downloading artifacts" - buildkite-agent artifact download x-pack/agentbeat/build/distributions/** . --step 'agentbeat-package-linux' + # buildkite-agent artifact download x-pack/agentbeat/build/distributions/** . --build 01924d2b-b061-45ae-a106-e885584ff26f --step agentbeat-package-linux + ./.buildkite/scripts/agentbeat/prepare_env.py ls -lah x-pack/agentbeat/build/distributions/ - echo "~~~ Installing @elastic/synthetics with npm" - npm install -g @elastic/synthetics - echo "~~~ Running tests" - cd x-pack/agentbeat - mage goIntegTest - artifact_paths: - - x-pack/agentbeat/build/distributions/**/* - - "x-pack/agentbeat/build/*.xml" - - "x-pack/agentbeat/build/*.json" - plugins: - - test-collector#v1.10.2: - files: "x-pack/agentbeat/build/TEST-*.xml" - format: "junit" - branches: "main" - debug: true - retry: - automatic: - - limit: 1 - timeout_in_minutes: 60 agents: provider: "gcp" image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_HI_PERF_MACHINE_TYPE}" disk_size: 100 disk_type: "pd-ssd" - notify: - - github_commit_status: - context: "agentbeat: Integration tests" + +# - label: ":linux: Agentbeat/Integration tests Windows" +# key: "agentbeat-it-windows" +# depends_on: +# - agentbeat-package-linux +# commands: +# - ./.buildkite/scripts/agentbeat/prepare_env.py +# agents: +# provider: "gcp" +# image: "${IMAGE_WIN_2022}" +# machine_type: "${GCP_WIN_MACHINE_TYPE}" +# disk_size: 200 +# disk_type: "pd-ssd" +# +# - label: ":linux: Agentbeat/Integration tests macOS" +# key: "agentbeat-it-macos" +# depends_on: +# - agentbeat-package-linux +# command: | +# set -euo pipefail +# ./.buildkite/scripts/agentbeat/prepare_env.py +# agents: +# provider: "orka" +# imagePrefix: "${IMAGE_MACOS_X86_64}" +# +# - label: ":linux: Agentbeat/Integration tests macOS arm64" +# key: "agentbeat-it-macos-arm" +# depends_on: +# - agentbeat-package-linux +# command: | +# set -euo pipefail +# ./.buildkite/scripts/agentbeat/prepare_env.py +# agents: +# provider: "orka" +# imagePrefix: "${IMAGE_MACOS_ARM}" From 7c32b3f86c007503d13527c9d7b5fdab6034f931 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Wed, 2 Oct 2024 17:55:27 +0300 Subject: [PATCH 02/18] updated artifact pattern --- .buildkite/scripts/agentbeat/prepare_env.py | 42 +++++++++---------- .../x-pack/pipeline.xpack.agentbeat.yml | 22 +++++----- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index 50f7d8123299..11d11bc56cda 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -1,43 +1,47 @@ #!/usr/bin/env python3 -import os import platform import subprocess +import sys def get_os() -> str: - return platform.system() + return platform.system().lower() def get_arch() -> str: - return platform.machine() + arch = platform.machine().lower() + if arch == "amd64": + return "x86_64" + else: + return arch -def get_cwd() -> str: - return os.getcwd() +def download_agentbeat_artifact(agent_os, agent_arch): + pattern = f"x-pack/agentbeat/build/distributions/agentbeat-*-{agent_os}-{agent_arch}.tar.gz" -def download_agentbeat_artifact(os, arch): - pattern = "x-pack/agentbeat/build/distributions/agentbeat-9.0.0-SNAPSHOT-linux-x86_64.tar.gz" - # pattern = "x-pack/agentbeat/build/distributions/**" - # command = f"buildkite-agent artifact download \"{pattern}\" . --step 'agentbeat-package-linux'" + print("--- Downloading agentbeat artifact") try: - print("--- Downloading agentbeat artifact") - result = subprocess.run( + subprocess.run( ["buildkite-agent", "artifact", "download", pattern, ".", "--build", "01924d2b-b061-45ae-a106-e885584ff26f", "--step", "agentbeat-package-linux"], - check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(result.stdout.decode()) + check=True, stdout=sys.stdout, stderr=subprocess.PIPE, text=True) except subprocess.CalledProcessError as e: - print("--- Error occurred while downloading agentbeat\n" + e.stderr) + print("--- Error occurred. Failed to download agentbeat: \n" + e.stderr) exit(1) +def unzip_agentbeat(): + print("todo unzip") + + def install_synthetics(): + print("--- Installing @elastic/synthetics") + try: - print("--- Installing @elastic/synthetics") subprocess.run( ["npm install -g @elastic/synthetics"], check=True @@ -46,12 +50,6 @@ def install_synthetics(): print("Failed to install @elastic/synthetics") exit(1) - -# print("--- OS: " + get_os()) -# -# print("--- ARCH: " + get_arch()) -# -# print("--- CWD: " + get_cwd()) - +print("--- OS Data: " + get_os() + " " + get_arch()) download_agentbeat_artifact(get_os(), get_arch()) # install_synthetics() diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 0af413435cbc..cfed290ffba7 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -107,9 +107,7 @@ steps: # - agentbeat-package-linux command: | set -euo pipefail - # buildkite-agent artifact download x-pack/agentbeat/build/distributions/** . --build 01924d2b-b061-45ae-a106-e885584ff26f --step agentbeat-package-linux ./.buildkite/scripts/agentbeat/prepare_env.py - ls -lah x-pack/agentbeat/build/distributions/ agents: provider: "gcp" image: "${IMAGE_UBUNTU_X86_64}" @@ -117,18 +115,18 @@ steps: disk_size: 100 disk_type: "pd-ssd" -# - label: ":linux: Agentbeat/Integration tests Windows" -# key: "agentbeat-it-windows" + - label: ":linux: Agentbeat/Integration tests Windows" + key: "agentbeat-it-windows" # depends_on: # - agentbeat-package-linux -# commands: -# - ./.buildkite/scripts/agentbeat/prepare_env.py -# agents: -# provider: "gcp" -# image: "${IMAGE_WIN_2022}" -# machine_type: "${GCP_WIN_MACHINE_TYPE}" -# disk_size: 200 -# disk_type: "pd-ssd" + commands: + - ./.buildkite/scripts/agentbeat/prepare_env.py + agents: + provider: "gcp" + image: "${IMAGE_WIN_2022}" + machine_type: "${GCP_WIN_MACHINE_TYPE}" + disk_size: 200 + disk_type: "pd-ssd" # # - label: ":linux: Agentbeat/Integration tests macOS" # key: "agentbeat-it-macos" From db2fc5ce2a7ed7cb0e6512848de47465cbec130e Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 3 Oct 2024 12:35:46 +0300 Subject: [PATCH 03/18] added stderr --- .buildkite/scripts/agentbeat/prepare_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index 11d11bc56cda..028f76476fbd 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -28,9 +28,9 @@ def download_agentbeat_artifact(agent_os, agent_arch): ["buildkite-agent", "artifact", "download", pattern, ".", "--build", "01924d2b-b061-45ae-a106-e885584ff26f", "--step", "agentbeat-package-linux"], - check=True, stdout=sys.stdout, stderr=subprocess.PIPE, text=True) + check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) except subprocess.CalledProcessError as e: - print("--- Error occurred. Failed to download agentbeat: \n" + e.stderr) + print("Error occurred. Failed to download agentbeat: \n" + e.stderr) exit(1) From 64c5ae125d607ea2001cfa5825b6a049fc997420 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 3 Oct 2024 12:42:42 +0300 Subject: [PATCH 04/18] added stderr --- .buildkite/scripts/agentbeat/prepare_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index 028f76476fbd..c3dbfd0977dd 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -28,14 +28,14 @@ def download_agentbeat_artifact(agent_os, agent_arch): ["buildkite-agent", "artifact", "download", pattern, ".", "--build", "01924d2b-b061-45ae-a106-e885584ff26f", "--step", "agentbeat-package-linux"], - check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) + check=True, stdout=sys.stdout, stderr=subprocess.PIPE, text=True) except subprocess.CalledProcessError as e: print("Error occurred. Failed to download agentbeat: \n" + e.stderr) exit(1) def unzip_agentbeat(): - print("todo unzip") + print("todo unzip") def install_synthetics(): From dfc302ee4d896e5a73d42f41661f2e9492bc51ee Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 3 Oct 2024 13:00:13 +0300 Subject: [PATCH 05/18] added stderr --- .buildkite/scripts/agentbeat/prepare_env.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index c3dbfd0977dd..6a5c0fd487c9 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -18,6 +18,13 @@ def get_arch() -> str: return arch +def get_artifact_extension(agent_os) -> str: + if agent_os == "windows": + return "zip" + else: + return "tar.gz" + + def download_agentbeat_artifact(agent_os, agent_arch): pattern = f"x-pack/agentbeat/build/distributions/agentbeat-*-{agent_os}-{agent_arch}.tar.gz" @@ -28,9 +35,9 @@ def download_agentbeat_artifact(agent_os, agent_arch): ["buildkite-agent", "artifact", "download", pattern, ".", "--build", "01924d2b-b061-45ae-a106-e885584ff26f", "--step", "agentbeat-package-linux"], - check=True, stdout=sys.stdout, stderr=subprocess.PIPE, text=True) - except subprocess.CalledProcessError as e: - print("Error occurred. Failed to download agentbeat: \n" + e.stderr) + check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) + except subprocess.CalledProcessError: + print("Failed to download agentbeat") exit(1) From b65df1598971d69e47703ea8505aff975070a6fb Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 3 Oct 2024 13:21:00 +0300 Subject: [PATCH 06/18] test error otput --- .buildkite/scripts/agentbeat/prepare_env.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index 6a5c0fd487c9..db42e64ee8fc 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -26,9 +26,9 @@ def get_artifact_extension(agent_os) -> str: def download_agentbeat_artifact(agent_os, agent_arch): - pattern = f"x-pack/agentbeat/build/distributions/agentbeat-*-{agent_os}-{agent_arch}.tar.gz" - - print("--- Downloading agentbeat artifact") + print(" ") + extension = get_artifact_extension(agent_os) + pattern = f"x-pack/agentbeat/build/distributions/agentbeat-*-{agent_os}-{agent_arch}.{extension}" try: subprocess.run( @@ -37,12 +37,17 @@ def download_agentbeat_artifact(agent_os, agent_arch): "--step", "agentbeat-package-linux"], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) except subprocess.CalledProcessError: - print("Failed to download agentbeat") exit(1) def unzip_agentbeat(): print("todo unzip") + try: + subprocess.run( + ["unzip"], + check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) + except subprocess.CalledProcessError: + exit(1) def install_synthetics(): From e7e7a5571ffbe1aa9e0b2cdbf949f64abeb8ede7 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 3 Oct 2024 13:51:23 +0300 Subject: [PATCH 07/18] added unzip --- .buildkite/scripts/agentbeat/prepare_env.py | 107 ++++++++++++++---- .../x-pack/pipeline.xpack.agentbeat.yml | 4 + dev-tools/mage/parse_spec.go | 98 ++++++++++++++++ x-pack/agentbeat/magefile.go | 4 + 4 files changed, 192 insertions(+), 21 deletions(-) create mode 100644 dev-tools/mage/parse_spec.go diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index db42e64ee8fc..4bda517516ba 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -3,7 +3,20 @@ import platform import subprocess import sys +import tarfile +import os +import re +PATH = 'x-pack/agentbeat/build/distributions' + + +def log(msg): + sys.stdout.write(f'{msg}\n') + sys.stdout.flush() + +def log_err(msg): + sys.stderr.write(f'{msg}\n') + sys.stderr.flush() def get_os() -> str: return platform.system().lower() @@ -12,56 +25,108 @@ def get_os() -> str: def get_arch() -> str: arch = platform.machine().lower() - if arch == "amd64": - return "x86_64" + if arch == 'amd64': + return 'x86_64' else: return arch def get_artifact_extension(agent_os) -> str: - if agent_os == "windows": - return "zip" + if agent_os == 'windows': + return 'zip' else: - return "tar.gz" + return 'tar.gz' -def download_agentbeat_artifact(agent_os, agent_arch): - print(" ") +def get_artifact_pattern() -> str: + agent_os = get_os() + agent_arch = get_arch() extension = get_artifact_extension(agent_os) - pattern = f"x-pack/agentbeat/build/distributions/agentbeat-*-{agent_os}-{agent_arch}.{extension}" + return f'{PATH}/agentbeat-*-{agent_os}-{agent_arch}.{extension}' + +def download_agentbeat(pattern, path) -> str: + log('--- Downloading agentbeat') try: subprocess.run( - ["buildkite-agent", "artifact", "download", pattern, ".", - "--build", "01924d2b-b061-45ae-a106-e885584ff26f", - "--step", "agentbeat-package-linux"], + ['buildkite-agent', 'artifact', 'download', pattern, '.', + '--build', '01924d2b-b061-45ae-a106-e885584ff26f', + '--step', 'agentbeat-package-linux'], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) except subprocess.CalledProcessError: exit(1) + return get_filename(path) + + +def get_filename(path) -> str: + try: + out = subprocess.run( + ['ls', '-p', path], + check=True, capture_output=True, text=True) + return out.stdout.strip() + except subprocess.CalledProcessError: + exit(1) + + +def extract_agentbeat(filename): + log('~~~ Extracting agentbeat') + filepath = PATH + '/' + filename + + if filepath.endswith('.zip'): + unzip_agentbeat(filepath) + else: + untar_agentbeat(filepath) + log('Successfully extracted agentbeat') + -def unzip_agentbeat(): - print("todo unzip") +def unzip_agentbeat(filepath): try: subprocess.run( - ["unzip"], + ['unzip', filepath], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as e: + log_err(e) + exit(1) + + +def untar_agentbeat(filepath): + try: + with tarfile.open(filepath, 'r:gz') as tar: + tar.list() + tar.extractall() + except Exception as e: + log_err(e) + exit(1) + + +def add_to_path(filepath): + pattern = r'(.*)(?=\.zip|.tar\.gz)' + match = re.match(pattern, filepath) + if match: + path = f'../build/distributions/{match.group(1)}/agentbeat' + log('--- AGENTBEAT_PATH: ' + path) + os.environ['AGENTBEAT_PATH'] = path + else: + log_err("No agentbeat executable found") exit(1) def install_synthetics(): - print("--- Installing @elastic/synthetics") + log('--- Installing @elastic/synthetics') try: subprocess.run( - ["npm install -g @elastic/synthetics"], + ['npm', 'install', '-g', '@elastic/synthetics'], check=True ) except subprocess.CalledProcessError: - print("Failed to install @elastic/synthetics") + log_err('Failed to install @elastic/synthetics') exit(1) -print("--- OS Data: " + get_os() + " " + get_arch()) -download_agentbeat_artifact(get_os(), get_arch()) -# install_synthetics() + +artifact_pattern = get_artifact_pattern() +archive = download_agentbeat(artifact_pattern, PATH) +extract_agentbeat(archive) +add_to_path(archive) +install_synthetics() diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index cfed290ffba7..71607aefb250 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -12,6 +12,8 @@ env: IMAGE_BEATS_WITH_HOOKS_LATEST: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:latest" + AGENTBEAT_SPEC: "../agentbeat.spec.yml" + steps: # - group: "Check/Update" # key: "x-pack-agentbeat-check-update" @@ -103,6 +105,8 @@ steps: - label: ":linux: Agentbeat/Integration tests Linux" key: "agentbeat-it-linux" + env: + PLATFORM: "linux/amd64" # depends_on: # - agentbeat-package-linux command: | diff --git a/dev-tools/mage/parse_spec.go b/dev-tools/mage/parse_spec.go new file mode 100644 index 000000000000..5dc20199288d --- /dev/null +++ b/dev-tools/mage/parse_spec.go @@ -0,0 +1,98 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package mage + +import ( + "gopkg.in/yaml.v2" + "log" + "os" +) + +type spec struct { + Inputs []input +} + +type input struct { + Name string + Description string + Platforms []string + Command command +} + +type command struct { + Name string + Args []string +} + +// ParseSpec parses agent.beat.spec.yml and generates test command +func ParseSpec() { + specPath := os.Getenv("AGENTBEAT_SPEC") + if specPath == "" { + log.Fatal("AGENTBEAT_SPEC is not defined") + } + + platform := os.Getenv("PLATFORM") + if platform == "" { + log.Fatal("PLATFORM is not defined") + } + + spec, err := parseToObj() + if err != nil { + log.Fatalf("Error parsing agentbeat.spec.yml: %v", err) + } + + inputList := filter(spec.Inputs, func(input input) bool { + return contains(input.Platforms, platform) + }) + + log.Print(inputList) +} + +func parseToObj() (spec, error) { + specFile, err := os.ReadFile("../agentbeat.spec.yml") + if err != nil { + log.Fatalf("Error opening agentbeat.spec.yml: %v", err) + return spec{}, err + } + var spec spec + err = yaml.Unmarshal(specFile, &spec) + if err != nil { + log.Fatalf("Error parsing agentbeat.spec.yml: %v", err) + return spec, err + } + return spec, nil +} + +func filter[T any](slice []T, condition func(T) bool) []T { + var result []T + for _, v := range slice { + if condition(v) { + result = append(result, v) + } + } + return result +} + +func contains(slice []string, item string) bool { + for _, v := range slice { + if v == item { + return true + } + } + return false +} diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index b65a3e59af36..097695552a7c 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -213,3 +213,7 @@ func PythonIntegTest(ctx context.Context) error { mg.Deps(BuildSystemTestBinary) return devtools.PythonIntegTestFromHost(devtools.DefaultPythonTestIntegrationFromHostArgs()) } + +func TestWithSpecs(ctx context.Context) { + mg.Deps(devtools.ParseSpec) +} From b5421e243f0991ceed03cba6b6215e479463dd07 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Tue, 8 Oct 2024 16:47:32 +0300 Subject: [PATCH 08/18] added TestWithSpec to magefile --- .buildkite/scripts/agentbeat/prepare_env.py | 4 +- .../x-pack/pipeline.xpack.agentbeat.yml | 10 ++- dev-tools/mage/{parse_spec.go => spec.go} | 31 ++++++--- x-pack/agentbeat/magefile.go | 66 ++++++++++++++++++- 4 files changed, 93 insertions(+), 18 deletions(-) rename dev-tools/mage/{parse_spec.go => spec.go} (75%) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/prepare_env.py index 4bda517516ba..d17c3cdd0e10 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/prepare_env.py @@ -105,8 +105,8 @@ def add_to_path(filepath): match = re.match(pattern, filepath) if match: path = f'../build/distributions/{match.group(1)}/agentbeat' - log('--- AGENTBEAT_PATH: ' + path) - os.environ['AGENTBEAT_PATH'] = path + log("--- PATH: " + str(path)) + os.environ['AGENTBEAT_PATH'] = str(path) else: log_err("No agentbeat executable found") exit(1) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 71607aefb250..b6225577b846 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -12,7 +12,7 @@ env: IMAGE_BEATS_WITH_HOOKS_LATEST: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:latest" - AGENTBEAT_SPEC: "../agentbeat.spec.yml" + AGENTBEAT_SPEC: "./agentbeat.spec.yml" steps: # - group: "Check/Update" @@ -112,6 +112,8 @@ steps: command: | set -euo pipefail ./.buildkite/scripts/agentbeat/prepare_env.py + cd x-pack/agentbeat + mage -v testWithSpec agents: provider: "gcp" image: "${IMAGE_UBUNTU_X86_64}" @@ -123,8 +125,10 @@ steps: key: "agentbeat-it-windows" # depends_on: # - agentbeat-package-linux - commands: - - ./.buildkite/scripts/agentbeat/prepare_env.py + command: | + ./.buildkite/scripts/agentbeat/prepare_env.py + cd x-pack/agentbeat + mage -v testWithSpec agents: provider: "gcp" image: "${IMAGE_WIN_2022}" diff --git a/dev-tools/mage/parse_spec.go b/dev-tools/mage/spec.go similarity index 75% rename from dev-tools/mage/parse_spec.go rename to dev-tools/mage/spec.go index 5dc20199288d..4afca22d5253 100644 --- a/dev-tools/mage/parse_spec.go +++ b/dev-tools/mage/spec.go @@ -21,6 +21,7 @@ import ( "gopkg.in/yaml.v2" "log" "os" + "strings" ) type spec struct { @@ -34,13 +35,17 @@ type input struct { Command command } +func (i *input) GetCommand() string { + return strings.Join(i.Command.Args, " ") +} + type command struct { Name string Args []string } -// ParseSpec parses agent.beat.spec.yml and generates test command -func ParseSpec() { +// SpecCommands parses agent.beat.spec.yml and collects commands for tests +func SpecCommands() []string { specPath := os.Getenv("AGENTBEAT_SPEC") if specPath == "" { log.Fatal("AGENTBEAT_SPEC is not defined") @@ -51,20 +56,26 @@ func ParseSpec() { log.Fatal("PLATFORM is not defined") } - spec, err := parseToObj() - if err != nil { - log.Fatalf("Error parsing agentbeat.spec.yml: %v", err) - } + spec, _ := parseToObj(specPath) - inputList := filter(spec.Inputs, func(input input) bool { + filteredInputs := filter(spec.Inputs, func(input input) bool { return contains(input.Platforms, platform) }) - log.Print(inputList) + commands := make(map[string]interface{}) + for _, i := range filteredInputs { + commands[i.GetCommand()] = nil + } + keys := make([]string, 0, len(commands)) + for k := range commands { + keys = append(keys, k) + } + + return keys } -func parseToObj() (spec, error) { - specFile, err := os.ReadFile("../agentbeat.spec.yml") +func parseToObj(path string) (spec, error) { + specFile, err := os.ReadFile(path) if err != nil { log.Fatalf("Error opening agentbeat.spec.yml: %v", err) return spec{}, err diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index 097695552a7c..c52e122efdb0 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -10,6 +10,7 @@ import ( "context" "fmt" "os" + "os/exec" "path/filepath" "time" @@ -142,7 +143,7 @@ func Package() error { return nil } -// TestPackages tests the generated packages (i.e. file modes, owners, groups). +// TestPackages tests the generated packages (i.agentbeatCmd. file modes, owners, groups). func TestPackages() error { return devtools.TestPackages() } @@ -214,6 +215,65 @@ func PythonIntegTest(ctx context.Context) error { return devtools.PythonIntegTestFromHost(devtools.DefaultPythonTestIntegrationFromHostArgs()) } -func TestWithSpecs(ctx context.Context) { - mg.Deps(devtools.ParseSpec) +// TestWithSpec executes unique commands from agentbeat.spec.yml and validates that app haven't exited with non-zero +func TestWithSpec(ctx context.Context) { + var commands = devtools.SpecCommands() + + agentbeatPath := os.Getenv("AGENTBEAT_PATH") + fmt.Printf("--- AGENTBEAT_PATH: %s", agentbeatPath) + + cmdResults := make(map[string]bool) + + for _, command := range commands { + cmdResults[command] = agentbeatCmd(agentbeatPath, command) + } + + hasFailures := false + for cmd, res := range cmdResults { + if res { + fmt.Printf("Command [%s] succeeded", cmd) + } else { + fmt.Printf("Command [%s] failed", cmd) + hasFailures = true + } + } + + if hasFailures { + fmt.Printf("Some inputs failed. Exiting with error") + os.Exit(1) + } +} + +func agentbeatCmd(agentbeatPath string, command string) bool { + cmd := exec.Command(agentbeatPath, command) + fmt.Printf("Running command: %v", cmd) + + if err := cmd.Start(); err != nil { + _ = fmt.Errorf("failed to start command: %v", err) + } + + defer func() { + if err := cmd.Process.Kill(); err != nil { + _ = fmt.Errorf("failed to kill process: %v", err) + } else { + _ = fmt.Errorf("command process killed") + } + }() + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + timeout := 2 * time.Second + deadline := time.After(timeout) + + select { + case err := <-done: + _ = fmt.Errorf("command exited before %s: %v", timeout.String(), err) + return false + + case <-deadline: + _ = fmt.Errorf("%s", cmd.Stdout) + return true + } } From 1f4bb93f1d588643da2d38939d4b58fc7a2ec3d4 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Wed, 9 Oct 2024 11:00:12 +0300 Subject: [PATCH 09/18] updated scripts --- .buildkite/hooks/pre-command | 5 +++ .../{prepare_env.py => setup_agentbeat.py} | 30 +++------------- .../x-pack/pipeline.xpack.agentbeat.yml | 10 +++--- dev-tools/mage/spec.go | 12 +------ x-pack/agentbeat/magefile.go | 35 ++++++++++++------- 5 files changed, 38 insertions(+), 54 deletions(-) rename .buildkite/scripts/agentbeat/{prepare_env.py => setup_agentbeat.py} (76%) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 5718d97879e1..8b84984a2823 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -35,6 +35,11 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "auditbeat" || \ export BUILDKITE_ANALYTICS_TOKEN fi +if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" ]]; then + AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) + export AGENTBEAT_PATH +fi + CPU_ARCH=$(uname -m) PLATFORM_TYPE=$(uname) diff --git a/.buildkite/scripts/agentbeat/prepare_env.py b/.buildkite/scripts/agentbeat/setup_agentbeat.py similarity index 76% rename from .buildkite/scripts/agentbeat/prepare_env.py rename to .buildkite/scripts/agentbeat/setup_agentbeat.py index d17c3cdd0e10..62e8c3bbd2cd 100755 --- a/.buildkite/scripts/agentbeat/prepare_env.py +++ b/.buildkite/scripts/agentbeat/setup_agentbeat.py @@ -4,7 +4,6 @@ import subprocess import sys import tarfile -import os import re PATH = 'x-pack/agentbeat/build/distributions' @@ -46,7 +45,6 @@ def get_artifact_pattern() -> str: def download_agentbeat(pattern, path) -> str: - log('--- Downloading agentbeat') try: subprocess.run( ['buildkite-agent', 'artifact', 'download', pattern, '.', @@ -70,20 +68,18 @@ def get_filename(path) -> str: def extract_agentbeat(filename): - log('~~~ Extracting agentbeat') filepath = PATH + '/' + filename if filepath.endswith('.zip'): unzip_agentbeat(filepath) else: untar_agentbeat(filepath) - log('Successfully extracted agentbeat') def unzip_agentbeat(filepath): try: subprocess.run( - ['unzip', filepath], + ['unzip', '-qq', filepath], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) except subprocess.CalledProcessError as e: log_err(e) @@ -93,40 +89,24 @@ def unzip_agentbeat(filepath): def untar_agentbeat(filepath): try: with tarfile.open(filepath, 'r:gz') as tar: - tar.list() tar.extractall() except Exception as e: log_err(e) exit(1) -def add_to_path(filepath): +def get_path_to_executable(filepath) -> str: pattern = r'(.*)(?=\.zip|.tar\.gz)' match = re.match(pattern, filepath) if match: - path = f'../build/distributions/{match.group(1)}/agentbeat' - log("--- PATH: " + str(path)) - os.environ['AGENTBEAT_PATH'] = str(path) + path = f'../../{match.group(1)}/agentbeat' + return path else: log_err("No agentbeat executable found") exit(1) -def install_synthetics(): - log('--- Installing @elastic/synthetics') - - try: - subprocess.run( - ['npm', 'install', '-g', '@elastic/synthetics'], - check=True - ) - except subprocess.CalledProcessError: - log_err('Failed to install @elastic/synthetics') - exit(1) - - artifact_pattern = get_artifact_pattern() archive = download_agentbeat(artifact_pattern, PATH) extract_agentbeat(archive) -add_to_path(archive) -install_synthetics() +log(get_path_to_executable(archive)) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index b6225577b846..57618c671e40 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -110,9 +110,8 @@ steps: # depends_on: # - agentbeat-package-linux command: | - set -euo pipefail - ./.buildkite/scripts/agentbeat/prepare_env.py cd x-pack/agentbeat + ls -la ../../agentbeat-9.0.0-SNAPSHOT-linux-x86_64 mage -v testWithSpec agents: provider: "gcp" @@ -123,10 +122,11 @@ steps: - label: ":linux: Agentbeat/Integration tests Windows" key: "agentbeat-it-windows" + env: + PLATFORM: "windows/amd64" # depends_on: # - agentbeat-package-linux command: | - ./.buildkite/scripts/agentbeat/prepare_env.py cd x-pack/agentbeat mage -v testWithSpec agents: @@ -142,7 +142,7 @@ steps: # - agentbeat-package-linux # command: | # set -euo pipefail -# ./.buildkite/scripts/agentbeat/prepare_env.py +# ./.buildkite/scripts/agentbeat/setup_agentbeat.py # agents: # provider: "orka" # imagePrefix: "${IMAGE_MACOS_X86_64}" @@ -153,7 +153,7 @@ steps: # - agentbeat-package-linux # command: | # set -euo pipefail -# ./.buildkite/scripts/agentbeat/prepare_env.py +# ./.buildkite/scripts/agentbeat/setup_agentbeat.py # agents: # provider: "orka" # imagePrefix: "${IMAGE_MACOS_ARM}" diff --git a/dev-tools/mage/spec.go b/dev-tools/mage/spec.go index 4afca22d5253..af0527fd6196 100644 --- a/dev-tools/mage/spec.go +++ b/dev-tools/mage/spec.go @@ -45,17 +45,7 @@ type command struct { } // SpecCommands parses agent.beat.spec.yml and collects commands for tests -func SpecCommands() []string { - specPath := os.Getenv("AGENTBEAT_SPEC") - if specPath == "" { - log.Fatal("AGENTBEAT_SPEC is not defined") - } - - platform := os.Getenv("PLATFORM") - if platform == "" { - log.Fatal("PLATFORM is not defined") - } - +func SpecCommands(specPath string, platform string) []string { spec, _ := parseToObj(specPath) filteredInputs := filter(spec.Inputs, func(input input) bool { diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index c52e122efdb0..0b29c6c5d747 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -9,6 +9,7 @@ package main import ( "context" "fmt" + "log" "os" "os/exec" "path/filepath" @@ -217,10 +218,19 @@ func PythonIntegTest(ctx context.Context) error { // TestWithSpec executes unique commands from agentbeat.spec.yml and validates that app haven't exited with non-zero func TestWithSpec(ctx context.Context) { - var commands = devtools.SpecCommands() + specPath := os.Getenv("AGENTBEAT_SPEC") + if specPath == "" { + log.Fatal("AGENTBEAT_SPEC is not defined\n") + } + + platform := os.Getenv("PLATFORM") + if platform == "" { + log.Fatal("PLATFORM is not defined\n") + } + + var commands = devtools.SpecCommands(specPath, platform) agentbeatPath := os.Getenv("AGENTBEAT_PATH") - fmt.Printf("--- AGENTBEAT_PATH: %s", agentbeatPath) cmdResults := make(map[string]bool) @@ -230,33 +240,32 @@ func TestWithSpec(ctx context.Context) { hasFailures := false for cmd, res := range cmdResults { - if res { - fmt.Printf("Command [%s] succeeded", cmd) - } else { - fmt.Printf("Command [%s] failed", cmd) + if !res { + fmt.Printf("~~~ Failed: [%s]\n", cmd) + fmt.Print(res) hasFailures = true } } if hasFailures { - fmt.Printf("Some inputs failed. Exiting with error") + fmt.Printf("Some inputs failed. Exiting with error\n") os.Exit(1) } } func agentbeatCmd(agentbeatPath string, command string) bool { cmd := exec.Command(agentbeatPath, command) - fmt.Printf("Running command: %v", cmd) + fmt.Printf("Running command: %v\n", cmd) if err := cmd.Start(); err != nil { - _ = fmt.Errorf("failed to start command: %v", err) + fmt.Printf("failed to start command: %v\n", err) } defer func() { if err := cmd.Process.Kill(); err != nil { - _ = fmt.Errorf("failed to kill process: %v", err) + fmt.Printf("failed to kill process: %v\n", err) } else { - _ = fmt.Errorf("command process killed") + fmt.Print("command process killed\n") } }() @@ -269,11 +278,11 @@ func agentbeatCmd(agentbeatPath string, command string) bool { select { case err := <-done: - _ = fmt.Errorf("command exited before %s: %v", timeout.String(), err) + fmt.Printf("command exited before %s: %v\n", timeout.String(), err) return false case <-deadline: - _ = fmt.Errorf("%s", cmd.Stdout) + fmt.Printf("%s\n", cmd.Stdout) return true } } From 771e938e0ce13f80b334a47fd47cd2df07543d63 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 10 Oct 2024 11:00:03 +0300 Subject: [PATCH 10/18] debug linux --- .buildkite/hooks/pre-command | 10 +++---- .../x-pack/pipeline.xpack.agentbeat.yml | 16 ++++++++---- x-pack/agentbeat/magefile.go | 26 ++++++++++++++----- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 8b84984a2823..a6e5cd8750ef 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -18,6 +18,11 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-packetbeat" && "$BUILDKITE_STEP export PRIVATE_CI_GCS_CREDENTIALS_SECRET fi +if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" ]]; then + AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) + export AGENTBEAT_PATH +fi + if [[ "$BUILDKITE_PIPELINE_SLUG" == "auditbeat" || \ "$BUILDKITE_PIPELINE_SLUG" == "beats-libbeat" || \ "$BUILDKITE_PIPELINE_SLUG" == "beats-metricbeat" || \ @@ -35,11 +40,6 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "auditbeat" || \ export BUILDKITE_ANALYTICS_TOKEN fi -if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" ]]; then - AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) - export AGENTBEAT_PATH -fi - CPU_ARCH=$(uname -m) PLATFORM_TYPE=$(uname) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 57618c671e40..1b4f68992631 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -89,6 +89,12 @@ steps: # - x-pack/agentbeat/build/distributions/**/* # - "x-pack/agentbeat/build/*.xml" # - "x-pack/agentbeat/build/*.json" +# plugins: +# - test-collector#v1.10.2: +# files: "x-pack/agentbeat/build/TEST-*.xml" +# format: "junit" +# branches: "main" +# debug: true # retry: # automatic: # - limit: 1 @@ -103,7 +109,7 @@ steps: # - github_commit_status: # context: "agentbeat: Integration tests" - - label: ":linux: Agentbeat/Integration tests Linux" + - label: ":linux: x-pack/agentbeat: Ubuntu x86_64 Spec tests" key: "agentbeat-it-linux" env: PLATFORM: "linux/amd64" @@ -111,8 +117,8 @@ steps: # - agentbeat-package-linux command: | cd x-pack/agentbeat - ls -la ../../agentbeat-9.0.0-SNAPSHOT-linux-x86_64 - mage -v testWithSpec + ../../agentbeat-9.0.0-SNAPSHOT-linux-x86_64/agentbeat filebeat -E setup.ilm.enabled=false -E setup.template.enabled=false -E management.enabled=true -E management.restart_on_output_change=true -E logging.level=info -E logging.to_stderr=true -E gc_percent=${FILEBEAT_GOGC:100} -E filebeat.config.modules.enabled=false -E logging.event_data.to_stderr=true -E logging.event_data.to_files=false + # mage -v testWithSpec agents: provider: "gcp" image: "${IMAGE_UBUNTU_X86_64}" @@ -120,14 +126,14 @@ steps: disk_size: 100 disk_type: "pd-ssd" - - label: ":linux: Agentbeat/Integration tests Windows" + - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" key: "agentbeat-it-windows" env: PLATFORM: "windows/amd64" # depends_on: # - agentbeat-package-linux command: | - cd x-pack/agentbeat + Set-Location -Path x-pack/agentbeat mage -v testWithSpec agents: provider: "gcp" diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index 0b29c6c5d747..429c4a2049fd 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -7,6 +7,7 @@ package main import ( + "bufio" "context" "fmt" "log" @@ -144,7 +145,7 @@ func Package() error { return nil } -// TestPackages tests the generated packages (i.agentbeatCmd. file modes, owners, groups). +// TestPackages tests the generated packages (i.runCmd. file modes, owners, groups). func TestPackages() error { return devtools.TestPackages() } @@ -235,14 +236,15 @@ func TestWithSpec(ctx context.Context) { cmdResults := make(map[string]bool) for _, command := range commands { - cmdResults[command] = agentbeatCmd(agentbeatPath, command) + cmdResults[command] = runCmd(agentbeatPath, command) } hasFailures := false for cmd, res := range cmdResults { - if !res { - fmt.Printf("~~~ Failed: [%s]\n", cmd) - fmt.Print(res) + if res { + fmt.Printf("--- :large_green_circle: Succeeded: [%s.10s...]\n", cmd) + } else { + fmt.Printf("--- :bangbang: Failed: [%s.10s...]\n", cmd) hasFailures = true } } @@ -253,9 +255,14 @@ func TestWithSpec(ctx context.Context) { } } -func agentbeatCmd(agentbeatPath string, command string) bool { +func runCmd(agentbeatPath string, command string) bool { cmd := exec.Command(agentbeatPath, command) - fmt.Printf("Running command: %v\n", cmd) + fmt.Printf("Executing: %s\n", cmd.String()) + + stdout, err := cmd.StdoutPipe() + if err != nil { + fmt.Printf("Error creating stdout pipe: %v\n", err) + } if err := cmd.Start(); err != nil { fmt.Printf("failed to start command: %v\n", err) @@ -279,6 +286,11 @@ func agentbeatCmd(agentbeatPath string, command string) bool { select { case err := <-done: fmt.Printf("command exited before %s: %v\n", timeout.String(), err) + fmt.Println("printing command stdout") + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + fmt.Println(scanner.Text()) + } return false case <-deadline: From dfe45dc5f62006ac3333b36349f2750ff66c026c Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Mon, 14 Oct 2024 11:00:48 +0300 Subject: [PATCH 11/18] updated std's outputs --- .../x-pack/pipeline.xpack.agentbeat.yml | 3 +-- x-pack/agentbeat/magefile.go | 20 +++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 1b4f68992631..95c091fe0530 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -117,8 +117,7 @@ steps: # - agentbeat-package-linux command: | cd x-pack/agentbeat - ../../agentbeat-9.0.0-SNAPSHOT-linux-x86_64/agentbeat filebeat -E setup.ilm.enabled=false -E setup.template.enabled=false -E management.enabled=true -E management.restart_on_output_change=true -E logging.level=info -E logging.to_stderr=true -E gc_percent=${FILEBEAT_GOGC:100} -E filebeat.config.modules.enabled=false -E logging.event_data.to_stderr=true -E logging.event_data.to_files=false - # mage -v testWithSpec + mage -v testWithSpec agents: provider: "gcp" image: "${IMAGE_UBUNTU_X86_64}" diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index 429c4a2049fd..d02b5f9c83a8 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -7,13 +7,13 @@ package main import ( - "bufio" "context" "fmt" "log" "os" "os/exec" "path/filepath" + "strings" "time" "github.com/magefile/mage/sh" @@ -236,7 +236,7 @@ func TestWithSpec(ctx context.Context) { cmdResults := make(map[string]bool) for _, command := range commands { - cmdResults[command] = runCmd(agentbeatPath, command) + cmdResults[command] = runCmd(agentbeatPath, strings.Split(command, " ")) } hasFailures := false @@ -255,14 +255,13 @@ func TestWithSpec(ctx context.Context) { } } -func runCmd(agentbeatPath string, command string) bool { - cmd := exec.Command(agentbeatPath, command) +func runCmd(agentbeatPath string, command []string) bool { + cmd := exec.Command(agentbeatPath, command...) fmt.Printf("Executing: %s\n", cmd.String()) - stdout, err := cmd.StdoutPipe() - if err != nil { - fmt.Printf("Error creating stdout pipe: %v\n", err) - } + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin if err := cmd.Start(); err != nil { fmt.Printf("failed to start command: %v\n", err) @@ -286,11 +285,6 @@ func runCmd(agentbeatPath string, command string) bool { select { case err := <-done: fmt.Printf("command exited before %s: %v\n", timeout.String(), err) - fmt.Println("printing command stdout") - scanner := bufio.NewScanner(stdout) - for scanner.Scan() { - fmt.Println(scanner.Text()) - } return false case <-deadline: From 63ea4d7837e47aa8bcf92ff0d12cec09dfa9f5df Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Mon, 14 Oct 2024 11:46:05 +0300 Subject: [PATCH 12/18] updated scripts && enbaled macos step --- .buildkite/hooks/pre-command | 5 +- .../scripts/agentbeat/setup_agentbeat.py | 2 +- .../x-pack/pipeline.xpack.agentbeat.yml | 110 +++++++++--------- 3 files changed, 60 insertions(+), 57 deletions(-) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index a6e5cd8750ef..a6755d9df3c1 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -18,7 +18,10 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-packetbeat" && "$BUILDKITE_STEP export PRIVATE_CI_GCS_CREDENTIALS_SECRET fi -if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" ]]; then +if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" && "$BUILDKITE_STEP_KEY" == *"agentbeat-it"* ]]; then + if [[ "$BUILDKITE_STEP_KEY" == *"macos"* ]]; then + ulimit -Sn 10000 + fi AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) export AGENTBEAT_PATH fi diff --git a/.buildkite/scripts/agentbeat/setup_agentbeat.py b/.buildkite/scripts/agentbeat/setup_agentbeat.py index 62e8c3bbd2cd..0c82c16021f5 100755 --- a/.buildkite/scripts/agentbeat/setup_agentbeat.py +++ b/.buildkite/scripts/agentbeat/setup_agentbeat.py @@ -48,7 +48,7 @@ def download_agentbeat(pattern, path) -> str: try: subprocess.run( ['buildkite-agent', 'artifact', 'download', pattern, '.', - '--build', '01924d2b-b061-45ae-a106-e885584ff26f', + # '--build', '01924d2b-b061-45ae-a106-e885584ff26f', '--step', 'agentbeat-package-linux'], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) except subprocess.CalledProcessError: diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 95c091fe0530..4cf23fdffd0c 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -15,19 +15,19 @@ env: AGENTBEAT_SPEC: "./agentbeat.spec.yml" steps: -# - group: "Check/Update" -# key: "x-pack-agentbeat-check-update" -# -# steps: -# - label: "agentbeat: Run pre-commit" -# command: "pre-commit run --all-files" -# agents: -# image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" -# memory: "2Gi" -# useCustomGlobalHooks: true -# notify: -# - github_commit_status: -# context: "agentbeat: pre-commit" + - group: "Check/Update" + key: "x-pack-agentbeat-check-update" + + steps: + - label: "agentbeat: Run pre-commit" + command: "pre-commit run --all-files" + agents: + image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" + memory: "2Gi" + useCustomGlobalHooks: true + notify: + - github_commit_status: + context: "agentbeat: pre-commit" # - wait: ~ # # with PRs, we want to run mandatory tests only if check/update step succeed @@ -40,32 +40,32 @@ steps: key: "agentbeat-mandatory-tests" steps: -# - label: ":linux: Agentbeat packaging Linux" -# key: "agentbeat-package-linux" -# env: -# PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" -# SNAPSHOT: true -# command: | -# set -euo pipefail -# cd x-pack/agentbeat -# mage package -# artifact_paths: -# - x-pack/agentbeat/build/distributions/**/* -# - "x-pack/agentbeat/build/*.xml" -# - "x-pack/agentbeat/build/*.json" -# retry: -# automatic: -# - limit: 2 -# timeout_in_minutes: 60 -# agents: -# provider: "gcp" -# image: "${IMAGE_UBUNTU_X86_64}" -# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" -# disk_size: 100 -# disk_type: "pd-ssd" -# notify: -# - github_commit_status: -# context: "agentbeat: Packaging" + - label: ":linux: Agentbeat packaging Linux" + key: "agentbeat-package-linux" + env: + PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" + SNAPSHOT: true + command: | + set -euo pipefail + cd x-pack/agentbeat + mage package + artifact_paths: + - x-pack/agentbeat/build/distributions/**/* + - "x-pack/agentbeat/build/*.xml" + - "x-pack/agentbeat/build/*.json" + retry: + automatic: + - limit: 2 + timeout_in_minutes: 60 + agents: + provider: "gcp" + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_HI_PERF_MACHINE_TYPE}" + disk_size: 100 + disk_type: "pd-ssd" + notify: + - github_commit_status: + context: "agentbeat: Packaging" # - label: ":linux: Agentbeat/Integration tests Linux" # key: "agentbeat-it-linux" @@ -113,8 +113,8 @@ steps: key: "agentbeat-it-linux" env: PLATFORM: "linux/amd64" -# depends_on: -# - agentbeat-package-linux + depends_on: + - agentbeat-package-linux command: | cd x-pack/agentbeat mage -v testWithSpec @@ -129,8 +129,8 @@ steps: key: "agentbeat-it-windows" env: PLATFORM: "windows/amd64" -# depends_on: -# - agentbeat-package-linux + depends_on: + - agentbeat-package-linux command: | Set-Location -Path x-pack/agentbeat mage -v testWithSpec @@ -140,18 +140,18 @@ steps: machine_type: "${GCP_WIN_MACHINE_TYPE}" disk_size: 200 disk_type: "pd-ssd" -# -# - label: ":linux: Agentbeat/Integration tests macOS" -# key: "agentbeat-it-macos" -# depends_on: -# - agentbeat-package-linux -# command: | -# set -euo pipefail -# ./.buildkite/scripts/agentbeat/setup_agentbeat.py -# agents: -# provider: "orka" -# imagePrefix: "${IMAGE_MACOS_X86_64}" -# + + - label: ":macos: x-pack/agentbeat: macOS x86_64 Spec tests" + key: "agentbeat-it-macos" + depends_on: + - agentbeat-package-linux + command: | + cd x-pack/agentbeat + mage -v testWithSpec + agents: + provider: "orka" + imagePrefix: "${IMAGE_MACOS_X86_64}" + # - label: ":linux: Agentbeat/Integration tests macOS arm64" # key: "agentbeat-it-macos-arm" # depends_on: From f8ff6314a596f01e40ee32d2c7c3e52b97ad3c4e Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Mon, 14 Oct 2024 15:26:15 +0300 Subject: [PATCH 13/18] debug macos --- .../scripts/agentbeat/setup_agentbeat.py | 5 +- .../x-pack/pipeline.xpack.agentbeat.yml | 146 +++++++++--------- 2 files changed, 77 insertions(+), 74 deletions(-) diff --git a/.buildkite/scripts/agentbeat/setup_agentbeat.py b/.buildkite/scripts/agentbeat/setup_agentbeat.py index 0c82c16021f5..818e048b999d 100755 --- a/.buildkite/scripts/agentbeat/setup_agentbeat.py +++ b/.buildkite/scripts/agentbeat/setup_agentbeat.py @@ -48,7 +48,7 @@ def download_agentbeat(pattern, path) -> str: try: subprocess.run( ['buildkite-agent', 'artifact', 'download', pattern, '.', - # '--build', '01924d2b-b061-45ae-a106-e885584ff26f', + '--build', '01924d2b-b061-45ae-a106-e885584ff26f', '--step', 'agentbeat-package-linux'], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) except subprocess.CalledProcessError: @@ -58,6 +58,7 @@ def download_agentbeat(pattern, path) -> str: def get_filename(path) -> str: + print("--- Getting filename") try: out = subprocess.run( ['ls', '-p', path], @@ -108,5 +109,7 @@ def get_path_to_executable(filepath) -> str: artifact_pattern = get_artifact_pattern() archive = download_agentbeat(artifact_pattern, PATH) +print("--- Extracting") extract_agentbeat(archive) +print("--- Getting path to exec") log(get_path_to_executable(archive)) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 4cf23fdffd0c..8873ffe29b8e 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -29,43 +29,43 @@ steps: - github_commit_status: context: "agentbeat: pre-commit" -# - wait: ~ -# # with PRs, we want to run mandatory tests only if check/update step succeed -# # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests -# # this allows building DRA artifacts even if there is flakiness in check/update step -# if: build.env("BUILDKITE_PULL_REQUEST") != "false" -# depends_on: "x-pack-agentbeat-check-update" + - wait: ~ + # with PRs, we want to run mandatory tests only if check/update step succeed + # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests + # this allows building DRA artifacts even if there is flakiness in check/update step + if: build.env("BUILDKITE_PULL_REQUEST") != "false" + depends_on: "x-pack-agentbeat-check-update" - group: "Agentbeat tests" key: "agentbeat-mandatory-tests" steps: - - label: ":linux: Agentbeat packaging Linux" - key: "agentbeat-package-linux" - env: - PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" - SNAPSHOT: true - command: | - set -euo pipefail - cd x-pack/agentbeat - mage package - artifact_paths: - - x-pack/agentbeat/build/distributions/**/* - - "x-pack/agentbeat/build/*.xml" - - "x-pack/agentbeat/build/*.json" - retry: - automatic: - - limit: 2 - timeout_in_minutes: 60 - agents: - provider: "gcp" - image: "${IMAGE_UBUNTU_X86_64}" - machineType: "${GCP_HI_PERF_MACHINE_TYPE}" - disk_size: 100 - disk_type: "pd-ssd" - notify: - - github_commit_status: - context: "agentbeat: Packaging" +# - label: ":linux: Agentbeat packaging Linux" +# key: "agentbeat-package-linux" +# env: +# PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" +# SNAPSHOT: true +# command: | +# set -euo pipefail +# cd x-pack/agentbeat +# mage package +# artifact_paths: +# - x-pack/agentbeat/build/distributions/**/* +# - "x-pack/agentbeat/build/*.xml" +# - "x-pack/agentbeat/build/*.json" +# retry: +# automatic: +# - limit: 2 +# timeout_in_minutes: 60 +# agents: +# provider: "gcp" +# image: "${IMAGE_UBUNTU_X86_64}" +# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" +# disk_size: 100 +# disk_type: "pd-ssd" +# notify: +# - github_commit_status: +# context: "agentbeat: Packaging" # - label: ":linux: Agentbeat/Integration tests Linux" # key: "agentbeat-it-linux" @@ -109,42 +109,42 @@ steps: # - github_commit_status: # context: "agentbeat: Integration tests" - - label: ":linux: x-pack/agentbeat: Ubuntu x86_64 Spec tests" - key: "agentbeat-it-linux" - env: - PLATFORM: "linux/amd64" - depends_on: - - agentbeat-package-linux - command: | - cd x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "gcp" - image: "${IMAGE_UBUNTU_X86_64}" - machineType: "${GCP_HI_PERF_MACHINE_TYPE}" - disk_size: 100 - disk_type: "pd-ssd" - - - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" - key: "agentbeat-it-windows" - env: - PLATFORM: "windows/amd64" - depends_on: - - agentbeat-package-linux - command: | - Set-Location -Path x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "gcp" - image: "${IMAGE_WIN_2022}" - machine_type: "${GCP_WIN_MACHINE_TYPE}" - disk_size: 200 - disk_type: "pd-ssd" +# - label: ":linux: x-pack/agentbeat: Ubuntu x86_64 Spec tests" +# key: "agentbeat-it-linux" +# env: +# PLATFORM: "linux/amd64" +# depends_on: +# - agentbeat-package-linux +# command: | +# cd x-pack/agentbeat +# mage -v testWithSpec +# agents: +# provider: "gcp" +# image: "${IMAGE_UBUNTU_X86_64}" +# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" +# disk_size: 100 +# disk_type: "pd-ssd" +# +# - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" +# key: "agentbeat-it-windows" +# env: +# PLATFORM: "windows/amd64" +# depends_on: +# - agentbeat-package-linux +# command: | +# Set-Location -Path x-pack/agentbeat +# mage -v testWithSpec +# agents: +# provider: "gcp" +# image: "${IMAGE_WIN_2022}" +# machine_type: "${GCP_WIN_MACHINE_TYPE}" +# disk_size: 200 +# disk_type: "pd-ssd" - label: ":macos: x-pack/agentbeat: macOS x86_64 Spec tests" key: "agentbeat-it-macos" - depends_on: - - agentbeat-package-linux +# depends_on: +# - agentbeat-package-linux command: | cd x-pack/agentbeat mage -v testWithSpec @@ -152,13 +152,13 @@ steps: provider: "orka" imagePrefix: "${IMAGE_MACOS_X86_64}" -# - label: ":linux: Agentbeat/Integration tests macOS arm64" -# key: "agentbeat-it-macos-arm" + - label: ":linux: Agentbeat/Integration tests macOS arm64" + key: "agentbeat-it-macos-arm" # depends_on: # - agentbeat-package-linux -# command: | -# set -euo pipefail -# ./.buildkite/scripts/agentbeat/setup_agentbeat.py -# agents: -# provider: "orka" -# imagePrefix: "${IMAGE_MACOS_ARM}" + command: | + cd x-pack/agentbeat + mage -v testWithSpec + agents: + provider: "orka" + imagePrefix: "${IMAGE_MACOS_ARM}" From 0017b1731dd4ebd2088b4f491ab185c027d2d474 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Mon, 14 Oct 2024 15:32:30 +0300 Subject: [PATCH 14/18] debug macos --- .buildkite/hooks/pre-command | 6 +- .../scripts/agentbeat/setup_agentbeat.py | 18 +- .../x-pack/pipeline.xpack.agentbeat.yml | 178 +++++++++--------- dev-tools/mage/spec.go | 1 + 4 files changed, 102 insertions(+), 101 deletions(-) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index a6755d9df3c1..f1f9bd540294 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -19,10 +19,8 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-packetbeat" && "$BUILDKITE_STEP fi if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" && "$BUILDKITE_STEP_KEY" == *"agentbeat-it"* ]]; then - if [[ "$BUILDKITE_STEP_KEY" == *"macos"* ]]; then - ulimit -Sn 10000 - fi - AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) +# .buildkite/scripts/agentbeat/setup_agentbeat.py + AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py | tail -n 1) export AGENTBEAT_PATH fi diff --git a/.buildkite/scripts/agentbeat/setup_agentbeat.py b/.buildkite/scripts/agentbeat/setup_agentbeat.py index 818e048b999d..b5f148a18ccd 100755 --- a/.buildkite/scripts/agentbeat/setup_agentbeat.py +++ b/.buildkite/scripts/agentbeat/setup_agentbeat.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 - import platform +import re import subprocess import sys import tarfile -import re PATH = 'x-pack/agentbeat/build/distributions' @@ -48,9 +47,10 @@ def download_agentbeat(pattern, path) -> str: try: subprocess.run( ['buildkite-agent', 'artifact', 'download', pattern, '.', - '--build', '01924d2b-b061-45ae-a106-e885584ff26f', + # '--build', '01928f55-8452-41c6-89ba-fe21f019f53c', '--step', 'agentbeat-package-linux'], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) + except subprocess.CalledProcessError: exit(1) @@ -58,11 +58,11 @@ def download_agentbeat(pattern, path) -> str: def get_filename(path) -> str: - print("--- Getting filename") try: out = subprocess.run( ['ls', '-p', path], check=True, capture_output=True, text=True) + print("--- ls -p: " + out.stdout) return out.stdout.strip() except subprocess.CalledProcessError: exit(1) @@ -89,9 +89,10 @@ def unzip_agentbeat(filepath): def untar_agentbeat(filepath): try: - with tarfile.open(filepath, 'r:gz') as tar: - tar.extractall() - except Exception as e: + subprocess.run( + ['tar', '-xvf', filepath], + check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) + except subprocess.CalledProcessError as e: log_err(e) exit(1) @@ -106,10 +107,7 @@ def get_path_to_executable(filepath) -> str: log_err("No agentbeat executable found") exit(1) - artifact_pattern = get_artifact_pattern() archive = download_agentbeat(artifact_pattern, PATH) -print("--- Extracting") extract_agentbeat(archive) -print("--- Getting path to exec") log(get_path_to_executable(archive)) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 8873ffe29b8e..da66c7186589 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -15,57 +15,57 @@ env: AGENTBEAT_SPEC: "./agentbeat.spec.yml" steps: - - group: "Check/Update" - key: "x-pack-agentbeat-check-update" - - steps: - - label: "agentbeat: Run pre-commit" - command: "pre-commit run --all-files" - agents: - image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" - memory: "2Gi" - useCustomGlobalHooks: true - notify: - - github_commit_status: - context: "agentbeat: pre-commit" - - - wait: ~ - # with PRs, we want to run mandatory tests only if check/update step succeed - # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests - # this allows building DRA artifacts even if there is flakiness in check/update step - if: build.env("BUILDKITE_PULL_REQUEST") != "false" - depends_on: "x-pack-agentbeat-check-update" +# - group: "Check/Update" +# key: "x-pack-agentbeat-check-update" +# +# steps: +# - label: "agentbeat: Run pre-commit" +# command: "pre-commit run --all-files" +# agents: +# image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" +# memory: "2Gi" +# useCustomGlobalHooks: true +# notify: +# - github_commit_status: +# context: "agentbeat: pre-commit" +# +# - wait: ~ +# # with PRs, we want to run mandatory tests only if check/update step succeed +# # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests +# # this allows building DRA artifacts even if there is flakiness in check/update step +# if: build.env("BUILDKITE_PULL_REQUEST") != "false" +# depends_on: "x-pack-agentbeat-check-update" - group: "Agentbeat tests" key: "agentbeat-mandatory-tests" steps: -# - label: ":linux: Agentbeat packaging Linux" -# key: "agentbeat-package-linux" -# env: -# PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" -# SNAPSHOT: true -# command: | -# set -euo pipefail -# cd x-pack/agentbeat -# mage package -# artifact_paths: -# - x-pack/agentbeat/build/distributions/**/* -# - "x-pack/agentbeat/build/*.xml" -# - "x-pack/agentbeat/build/*.json" -# retry: -# automatic: -# - limit: 2 -# timeout_in_minutes: 60 -# agents: -# provider: "gcp" -# image: "${IMAGE_UBUNTU_X86_64}" -# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" -# disk_size: 100 -# disk_type: "pd-ssd" -# notify: -# - github_commit_status: -# context: "agentbeat: Packaging" + - label: ":linux: Agentbeat packaging Linux" + key: "agentbeat-package-linux" + env: + PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" + SNAPSHOT: true + command: | + set -euo pipefail + cd x-pack/agentbeat + mage package + artifact_paths: + - x-pack/agentbeat/build/distributions/**/* + - "x-pack/agentbeat/build/*.xml" + - "x-pack/agentbeat/build/*.json" + retry: + automatic: + - limit: 2 + timeout_in_minutes: 60 + agents: + provider: "gcp" + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_HI_PERF_MACHINE_TYPE}" + disk_size: 100 + disk_type: "pd-ssd" + notify: + - github_commit_status: + context: "agentbeat: Packaging" # - label: ":linux: Agentbeat/Integration tests Linux" # key: "agentbeat-it-linux" @@ -109,56 +109,60 @@ steps: # - github_commit_status: # context: "agentbeat: Integration tests" -# - label: ":linux: x-pack/agentbeat: Ubuntu x86_64 Spec tests" -# key: "agentbeat-it-linux" -# env: -# PLATFORM: "linux/amd64" -# depends_on: -# - agentbeat-package-linux -# command: | -# cd x-pack/agentbeat -# mage -v testWithSpec -# agents: -# provider: "gcp" -# image: "${IMAGE_UBUNTU_X86_64}" -# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" -# disk_size: 100 -# disk_type: "pd-ssd" -# -# - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" -# key: "agentbeat-it-windows" -# env: -# PLATFORM: "windows/amd64" -# depends_on: -# - agentbeat-package-linux -# command: | -# Set-Location -Path x-pack/agentbeat -# mage -v testWithSpec -# agents: -# provider: "gcp" -# image: "${IMAGE_WIN_2022}" -# machine_type: "${GCP_WIN_MACHINE_TYPE}" -# disk_size: 200 -# disk_type: "pd-ssd" + - label: ":linux: x-pack/agentbeat: Ubuntu x86_64 Spec tests" + key: "agentbeat-it-linux" + env: + PLATFORM: "linux/amd64" + depends_on: + - agentbeat-package-linux + command: | + cd x-pack/agentbeat + mage -v testWithSpec + agents: + provider: "gcp" + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_HI_PERF_MACHINE_TYPE}" + disk_size: 100 + disk_type: "pd-ssd" + + - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" + key: "agentbeat-it-windows" + env: + PLATFORM: "windows/amd64" + depends_on: + - agentbeat-package-linux + command: | + Set-Location -Path x-pack/agentbeat + mage -v testWithSpec + agents: + provider: "gcp" + image: "${IMAGE_WIN_2022}" + machine_type: "${GCP_WIN_MACHINE_TYPE}" + disk_size: 200 + disk_type: "pd-ssd" - label: ":macos: x-pack/agentbeat: macOS x86_64 Spec tests" key: "agentbeat-it-macos" -# depends_on: -# - agentbeat-package-linux + env: + PLATFORM: "darwin/amd64" + depends_on: + - agentbeat-package-linux command: | + set -euo pipefail + source .buildkite/scripts/install_macos_tools.sh cd x-pack/agentbeat mage -v testWithSpec agents: provider: "orka" imagePrefix: "${IMAGE_MACOS_X86_64}" - - label: ":linux: Agentbeat/Integration tests macOS arm64" - key: "agentbeat-it-macos-arm" +# - label: ":linux: Agentbeat/Integration tests macOS arm64" +# key: "agentbeat-it-macos-arm" # depends_on: # - agentbeat-package-linux - command: | - cd x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "orka" - imagePrefix: "${IMAGE_MACOS_ARM}" +# command: | +# cd x-pack/agentbeat +# mage -v testWithSpec +# agents: +# provider: "orka" +# imagePrefix: "${IMAGE_MACOS_ARM}" diff --git a/dev-tools/mage/spec.go b/dev-tools/mage/spec.go index af0527fd6196..03c733f1dd6d 100644 --- a/dev-tools/mage/spec.go +++ b/dev-tools/mage/spec.go @@ -19,6 +19,7 @@ package mage import ( "gopkg.in/yaml.v2" + "log" "os" "strings" From 74b0aca0b9c247ca0cae6d42e5d58a82755ada0a Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 17 Oct 2024 12:33:04 +0300 Subject: [PATCH 15/18] added macos arm step --- .buildkite/hooks/pre-command | 1 - .../scripts/agentbeat/setup_agentbeat.py | 9 +- .../x-pack/pipeline.xpack.agentbeat.yml | 93 +++++++------------ 3 files changed, 43 insertions(+), 60 deletions(-) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index f1f9bd540294..cb5bb51eb803 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -19,7 +19,6 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-packetbeat" && "$BUILDKITE_STEP fi if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" && "$BUILDKITE_STEP_KEY" == *"agentbeat-it"* ]]; then -# .buildkite/scripts/agentbeat/setup_agentbeat.py AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py | tail -n 1) export AGENTBEAT_PATH fi diff --git a/.buildkite/scripts/agentbeat/setup_agentbeat.py b/.buildkite/scripts/agentbeat/setup_agentbeat.py index b5f148a18ccd..35139ec22787 100755 --- a/.buildkite/scripts/agentbeat/setup_agentbeat.py +++ b/.buildkite/scripts/agentbeat/setup_agentbeat.py @@ -12,10 +12,12 @@ def log(msg): sys.stdout.write(f'{msg}\n') sys.stdout.flush() + def log_err(msg): sys.stderr.write(f'{msg}\n') sys.stderr.flush() + def get_os() -> str: return platform.system().lower() @@ -26,7 +28,10 @@ def get_arch() -> str: if arch == 'amd64': return 'x86_64' else: - return arch + if get_os() == 'darwin': + return 'aarch64' + else: + return arch def get_artifact_extension(agent_os) -> str: @@ -40,6 +45,7 @@ def get_artifact_pattern() -> str: agent_os = get_os() agent_arch = get_arch() extension = get_artifact_extension(agent_os) + print('Artifact params: ' + agent_os + ' ' + agent_arch + ' ' + extension) return f'{PATH}/agentbeat-*-{agent_os}-{agent_arch}.{extension}' @@ -47,7 +53,6 @@ def download_agentbeat(pattern, path) -> str: try: subprocess.run( ['buildkite-agent', 'artifact', 'download', pattern, '.', - # '--build', '01928f55-8452-41c6-89ba-fe21f019f53c', '--step', 'agentbeat-package-linux'], check=True, stdout=sys.stdout, stderr=sys.stderr, text=True) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index da66c7186589..a5375bd1e3d3 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -5,6 +5,9 @@ env: GCP_HI_PERF_MACHINE_TYPE: "c2d-highcpu-16" IMAGE_UBUNTU_X86_64: "family/platform-ingest-beats-ubuntu-2204" + AWS_ARM_INSTANCE_TYPE: "m6g.xlarge" + AWS_IMAGE_UBUNTU_ARM_64: "platform-ingest-beats-ubuntu-2204-aarch64" + IMAGE_MACOS_ARM: "generic-13-ventura-arm" IMAGE_MACOS_X86_64: "generic-13-ventura-x64" @@ -67,50 +70,8 @@ steps: - github_commit_status: context: "agentbeat: Packaging" - # - label: ":linux: Agentbeat/Integration tests Linux" - # key: "agentbeat-it-linux" - # depends_on: - # - agentbeat-package-linux - # env: - # ASDF_NODEJS_VERSION: 18.17.1 - # PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" - # SNAPSHOT: true - # command: | - # set -euo pipefail - # echo "~~~ Downloading artifacts" - # buildkite-agent artifact download x-pack/agentbeat/build/distributions/** . --step 'agentbeat-package-linux' - # ls -lah x-pack/agentbeat/build/distributions/ - # echo "~~~ Installing @elastic/synthetics with npm" - # npm install -g @elastic/synthetics - # echo "~~~ Running tests" - # cd x-pack/agentbeat - # mage goIntegTest - # artifact_paths: - # - x-pack/agentbeat/build/distributions/**/* - # - "x-pack/agentbeat/build/*.xml" - # - "x-pack/agentbeat/build/*.json" -# plugins: -# - test-collector#v1.10.2: -# files: "x-pack/agentbeat/build/TEST-*.xml" -# format: "junit" -# branches: "main" -# debug: true - # retry: - # automatic: - # - limit: 1 - # timeout_in_minutes: 60 - # agents: - # provider: "gcp" - # image: "${IMAGE_UBUNTU_X86_64}" - # machineType: "${GCP_HI_PERF_MACHINE_TYPE}" - # disk_size: 100 - # disk_type: "pd-ssd" - # notify: - # - github_commit_status: - # context: "agentbeat: Integration tests" - - - label: ":linux: x-pack/agentbeat: Ubuntu x86_64 Spec tests" - key: "agentbeat-it-linux" + - label: ":ubuntu: x-pack/agentbeat: Ubuntu x86_64 Spec tests" + key: "agentbeat-it-linux-x86-64" env: PLATFORM: "linux/amd64" depends_on: @@ -125,6 +86,20 @@ steps: disk_size: 100 disk_type: "pd-ssd" + - label: ":ubuntu: x-pack/agentbeat: Ubuntu arm64 Spec tests" + key: "agentbeat-it-linux-arm64" + env: + PLATFORM: "linux/arm64" + depends_on: + - agentbeat-package-linux + command: | + cd x-pack/agentbeat + mage -v testWithSpec + agents: + provider: "aws" + imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" + instanceType: "${AWS_ARM_INSTANCE_TYPE}" + - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" key: "agentbeat-it-windows" env: @@ -142,11 +117,11 @@ steps: disk_type: "pd-ssd" - label: ":macos: x-pack/agentbeat: macOS x86_64 Spec tests" - key: "agentbeat-it-macos" - env: - PLATFORM: "darwin/amd64" + key: "agentbeat-it-macos-x86-64" depends_on: - agentbeat-package-linux + env: + PLATFORM: "darwin/amd64" command: | set -euo pipefail source .buildkite/scripts/install_macos_tools.sh @@ -156,13 +131,17 @@ steps: provider: "orka" imagePrefix: "${IMAGE_MACOS_X86_64}" -# - label: ":linux: Agentbeat/Integration tests macOS arm64" -# key: "agentbeat-it-macos-arm" -# depends_on: -# - agentbeat-package-linux -# command: | -# cd x-pack/agentbeat -# mage -v testWithSpec -# agents: -# provider: "orka" -# imagePrefix: "${IMAGE_MACOS_ARM}" + - label: ":macos: Agentbeat/Integration tests macOS arm64" + key: "agentbeat-it-macos-arm64" + depends_on: + - agentbeat-package-linux + env: + PLATFORM: "darwin/arm64" + command: | + set -euo pipefail + source .buildkite/scripts/install_macos_tools.sh + cd x-pack/agentbeat + mage -v testWithSpec + agents: + provider: "orka" + imagePrefix: "${IMAGE_MACOS_ARM}" From cc26ea0e6f761b8029213e4b7c2543a3978fbc0d Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Thu, 17 Oct 2024 16:08:45 +0300 Subject: [PATCH 16/18] refactor --- .buildkite/hooks/pre-command | 4 +- .../scripts/agentbeat/setup_agentbeat.py | 69 +++++++++++-------- .../x-pack/pipeline.xpack.agentbeat.yml | 57 +++++++++------ 3 files changed, 79 insertions(+), 51 deletions(-) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index cb5bb51eb803..5cb0722edd84 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -19,7 +19,9 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-packetbeat" && "$BUILDKITE_STEP fi if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" && "$BUILDKITE_STEP_KEY" == *"agentbeat-it"* ]]; then - AGENTBEAT_PATH=$(.buildkite/scripts/agentbeat/setup_agentbeat.py | tail -n 1) + out=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) + echo "$out" + AGENTBEAT_PATH=$(echo "$out" | tail -n 1) export AGENTBEAT_PATH fi diff --git a/.buildkite/scripts/agentbeat/setup_agentbeat.py b/.buildkite/scripts/agentbeat/setup_agentbeat.py index 35139ec22787..02634649e787 100755 --- a/.buildkite/scripts/agentbeat/setup_agentbeat.py +++ b/.buildkite/scripts/agentbeat/setup_agentbeat.py @@ -6,6 +6,26 @@ import tarfile PATH = 'x-pack/agentbeat/build/distributions' +PLATFORMS = { + 'windows': { + 'amd64': 'x86_64', + }, + 'linux': { + 'x86_64': 'x86_64', + 'aarch64': 'arm64', + }, + 'darwin': { + 'x86_64': 'x86_64', + 'arm64': 'aarch64', + } + } + + +class Archive: + def __init__(self, os, arch, ext): + self.os = os + self.arch = arch + self.ext = ext def log(msg): @@ -18,38 +38,29 @@ def log_err(msg): sys.stderr.flush() -def get_os() -> str: - return platform.system().lower() +def get_archive_params() -> Archive: + system = platform.system().lower() + machine = platform.machine().lower() + arch = PLATFORMS.get(system, {}).get(machine) + ext = get_artifact_extension(system) + return Archive(system, arch, ext) -def get_arch() -> str: - arch = platform.machine().lower() - if arch == 'amd64': - return 'x86_64' - else: - if get_os() == 'darwin': - return 'aarch64' - else: - return arch - - -def get_artifact_extension(agent_os) -> str: - if agent_os == 'windows': +def get_artifact_extension(system) -> str: + if system == 'windows': return 'zip' else: return 'tar.gz' -def get_artifact_pattern() -> str: - agent_os = get_os() - agent_arch = get_arch() - extension = get_artifact_extension(agent_os) - print('Artifact params: ' + agent_os + ' ' + agent_arch + ' ' + extension) - return f'{PATH}/agentbeat-*-{agent_os}-{agent_arch}.{extension}' +def get_artifact_pattern(archive_obj) -> str: + return f'{PATH}/agentbeat-*-{archive_obj.os}-{archive_obj.arch}.{archive_obj.ext}' -def download_agentbeat(pattern, path) -> str: +def download_agentbeat(archive_obj) -> str: + pattern = get_artifact_pattern(archive_obj) + log('--- Downloading Agentbeat artifact by pattern: ' + pattern) try: subprocess.run( ['buildkite-agent', 'artifact', 'download', pattern, '.', @@ -59,15 +70,14 @@ def download_agentbeat(pattern, path) -> str: except subprocess.CalledProcessError: exit(1) - return get_filename(path) + return get_full_filename() -def get_filename(path) -> str: +def get_full_filename() -> str: try: out = subprocess.run( - ['ls', '-p', path], + ['ls', '-p', PATH], check=True, capture_output=True, text=True) - print("--- ls -p: " + out.stdout) return out.stdout.strip() except subprocess.CalledProcessError: exit(1) @@ -75,6 +85,7 @@ def get_filename(path) -> str: def extract_agentbeat(filename): filepath = PATH + '/' + filename + log('Extracting Agentbeat artifact: ' + filepath) if filepath.endswith('.zip'): unzip_agentbeat(filepath) @@ -109,10 +120,10 @@ def get_path_to_executable(filepath) -> str: path = f'../../{match.group(1)}/agentbeat' return path else: - log_err("No agentbeat executable found") + log_err('No agentbeat executable found') exit(1) -artifact_pattern = get_artifact_pattern() -archive = download_agentbeat(artifact_pattern, PATH) +archive_params = get_archive_params() +archive = download_agentbeat(archive_params) extract_agentbeat(archive) log(get_path_to_executable(archive)) diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index a5375bd1e3d3..1687aa25d922 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -18,26 +18,26 @@ env: AGENTBEAT_SPEC: "./agentbeat.spec.yml" steps: -# - group: "Check/Update" -# key: "x-pack-agentbeat-check-update" -# -# steps: -# - label: "agentbeat: Run pre-commit" -# command: "pre-commit run --all-files" -# agents: -# image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" -# memory: "2Gi" -# useCustomGlobalHooks: true -# notify: -# - github_commit_status: -# context: "agentbeat: pre-commit" -# -# - wait: ~ -# # with PRs, we want to run mandatory tests only if check/update step succeed -# # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests -# # this allows building DRA artifacts even if there is flakiness in check/update step -# if: build.env("BUILDKITE_PULL_REQUEST") != "false" -# depends_on: "x-pack-agentbeat-check-update" + - group: "Check/Update" + key: "x-pack-agentbeat-check-update" + + steps: + - label: "agentbeat: Run pre-commit" + command: "pre-commit run --all-files" + agents: + image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" + memory: "2Gi" + useCustomGlobalHooks: true + notify: + - github_commit_status: + context: "agentbeat: pre-commit" + + - wait: ~ + # with PRs, we want to run mandatory tests only if check/update step succeed + # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests + # this allows building DRA artifacts even if there is flakiness in check/update step + if: build.env("BUILDKITE_PULL_REQUEST") != "false" + depends_on: "x-pack-agentbeat-check-update" - group: "Agentbeat tests" key: "agentbeat-mandatory-tests" @@ -85,6 +85,9 @@ steps: machineType: "${GCP_HI_PERF_MACHINE_TYPE}" disk_size: 100 disk_type: "pd-ssd" + notify: + - github_commit_status: + context: "agentbeat: Ubuntu x86_64 Spec tests" - label: ":ubuntu: x-pack/agentbeat: Ubuntu arm64 Spec tests" key: "agentbeat-it-linux-arm64" @@ -99,6 +102,9 @@ steps: provider: "aws" imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" instanceType: "${AWS_ARM_INSTANCE_TYPE}" + notify: + - github_commit_status: + context: "agentbeat: Ubuntu arm64 Spec tests" - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" key: "agentbeat-it-windows" @@ -115,6 +121,9 @@ steps: machine_type: "${GCP_WIN_MACHINE_TYPE}" disk_size: 200 disk_type: "pd-ssd" + notify: + - github_commit_status: + context: "agentbeat: Windows x86_64 Spec tests" - label: ":macos: x-pack/agentbeat: macOS x86_64 Spec tests" key: "agentbeat-it-macos-x86-64" @@ -130,8 +139,11 @@ steps: agents: provider: "orka" imagePrefix: "${IMAGE_MACOS_X86_64}" + notify: + - github_commit_status: + context: "agentbeat: macOS x86_64 Spec tests" - - label: ":macos: Agentbeat/Integration tests macOS arm64" + - label: ":macos: x-pack/agentbeat: macOS arm64 Spec tests" key: "agentbeat-it-macos-arm64" depends_on: - agentbeat-package-linux @@ -145,3 +157,6 @@ steps: agents: provider: "orka" imagePrefix: "${IMAGE_MACOS_ARM}" + notify: + - github_commit_status: + context: "agentbeat: macOS arm64 Spec tests" From 8f433a3fddc6ad7d6d813c5a9eefc93e75806704 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Fri, 18 Oct 2024 14:58:40 +0300 Subject: [PATCH 17/18] restored testPackages annotation --- x-pack/agentbeat/magefile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index d02b5f9c83a8..bd72a558ba39 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -145,7 +145,7 @@ func Package() error { return nil } -// TestPackages tests the generated packages (i.runCmd. file modes, owners, groups). +// TestPackages tests the generated packages (i.e. file modes, owners, groups). func TestPackages() error { return devtools.TestPackages() } From c5e9e1a5f7b82eb5a97b59203d6867774c2de417 Mon Sep 17 00:00:00 2001 From: Olga Naidjonoka Date: Mon, 21 Oct 2024 12:05:33 +0300 Subject: [PATCH 18/18] added servrless tests step --- .buildkite/hooks/pre-command | 4 +- .../x-pack/pipeline.xpack.agentbeat.yml | 205 ++-- dev-tools/mage/agentbeat-serverless.go | 56 + dev-tools/mage/gotest.go | 27 + dev-tools/mage/spec.go | 100 -- .../srvrlesstest/component/platforms.go | 175 ++++ .../target/srvrlesstest/core/process/cmd.go | 49 + .../srvrlesstest/core/process/cmd_darwin.go | 67 ++ .../srvrlesstest/core/process/cmd_linux.go | 70 ++ .../core/process/external_unix.go | 42 + .../core/process/external_windows.go | 65 ++ .../srvrlesstest/core/process/job_unix.go | 52 + .../srvrlesstest/core/process/job_windows.go | 100 ++ .../srvrlesstest/core/process/process.go | 197 ++++ .../mage/target/srvrlesstest/define/batch.go | 320 ++++++ .../srvrlesstest/define/requirements.go | 179 ++++ .../mage/target/srvrlesstest/srvrlesstest.go | 428 ++++++++ .../srvrlesstest/testing/common/batch.go | 32 + .../srvrlesstest/testing/common/build.go | 32 + .../srvrlesstest/testing/common/config.go | 147 +++ .../srvrlesstest/testing/common/instance.go | 79 ++ .../srvrlesstest/testing/common/logger.go | 24 + .../testing/common/prefix_output.go | 74 ++ .../srvrlesstest/testing/common/runner.go | 57 + .../srvrlesstest/testing/common/stack.go | 89 ++ .../srvrlesstest/testing/common/supported.go | 28 + .../srvrlesstest/testing/define/batch.go | 320 ++++++ .../testing/define/requirements.go | 180 ++++ .../target/srvrlesstest/testing/ess/client.go | 79 ++ .../target/srvrlesstest/testing/ess/config.go | 86 ++ .../create_deployment_csp_configuration.yaml | 15 + .../ess/create_deployment_request.tmpl.json | 102 ++ .../srvrlesstest/testing/ess/deployment.go | 401 +++++++ .../srvrlesstest/testing/ess/serverless.go | 331 ++++++ .../testing/ess/serverless_provisioner.go | 275 +++++ .../testing/ess/statful_provisioner.go | 201 ++++ .../target/srvrlesstest/testing/fetcher.go | 256 +++++ .../srvrlesstest/testing/kubernetes/image.go | 258 +++++ .../testing/kubernetes/kind/provisioner.go | 298 ++++++ .../srvrlesstest/testing/kubernetes/runner.go | 135 +++ .../testing/kubernetes/supported.go | 117 +++ .../srvrlesstest/testing/linux/debian.go | 219 ++++ .../srvrlesstest/testing/linux/linux.go | 169 +++ .../target/srvrlesstest/testing/linux/rhel.go | 126 +++ .../mage/target/srvrlesstest/testing/log.go | 157 +++ .../testing/multipas/provisioner.go | 330 ++++++ .../target/srvrlesstest/testing/ogc/api.go | 60 ++ .../target/srvrlesstest/testing/ogc/config.go | 100 ++ .../srvrlesstest/testing/ogc/provisioner.go | 354 +++++++ .../srvrlesstest/testing/ogc/supported.go | 202 ++++ .../srvrlesstest/testing/runner/archiver.go | 125 +++ .../srvrlesstest/testing/runner/json.go | 60 ++ .../srvrlesstest/testing/runner/junit.go | 99 ++ .../srvrlesstest/testing/runner/runner.go | 968 +++++++++++++++++ .../srvrlesstest/testing/runner/utils.go | 54 + .../target/srvrlesstest/testing/ssh/client.go | 301 ++++++ .../target/srvrlesstest/testing/ssh/file.go | 32 + .../srvrlesstest/testing/ssh/interface.go | 62 ++ .../target/srvrlesstest/testing/ssh/keys.go | 60 ++ .../srvrlesstest/testing/supported/batch.go | 195 ++++ .../testing/supported/supported.go | 287 +++++ .../srvrlesstest/testing/windows/windows.go | 342 ++++++ .../target/srvrlesstest/utils/root_unix.go | 33 + .../target/srvrlesstest/utils/root_windows.go | 59 ++ .../srvrlesstest/utils/root_windows_test.go | 33 + go.mod | 25 +- go.sum | 63 +- x-pack/agentbeat/magefile.go | 82 +- .../agentbeat/testing/integration/README.md | 1 + .../agent_long_running_leak_test.go | 406 ++++++++ .../integration/agent_long_test_apache.json | 251 +++++ .../agent_long_test_base_system_integ.json | 788 ++++++++++++++ .../integration/apm_propagation_test.go | 240 +++++ .../integration/beats_serverless_test.go | 631 +++++++++++ .../testing/integration/container_cmd_test.go | 491 +++++++++ .../testing/integration/delay_enroll_test.go | 145 +++ .../testing/integration/diagnostics_test.go | 379 +++++++ .../endpoint_security_package.json.tmpl | 316 ++++++ .../integration/endpoint_security_test.go | 896 ++++++++++++++++ .../integration/endpoint_test_tools.go | 133 +++ .../testing/integration/event_logging_test.go | 404 ++++++++ .../testing/integration/fake_test.go | 179 ++++ x-pack/agentbeat/testing/integration/fakes.go | 85 ++ .../testing/integration/fleet-server.json | 38 + .../testing/integration/fleetserver_test.go | 168 +++ .../testing/integration/fqdn_test.go | 334 ++++++ .../testing/integration/groups_test.go | 43 + .../testing/integration/inspect_test.go | 91 ++ .../testing/integration/install_test.go | 420 ++++++++ .../kubernetes_agent_service_test.go | 129 +++ .../kubernetes_agent_standalone_test.go | 977 ++++++++++++++++++ .../testing/integration/linux_deb_test.go | 194 ++++ .../testing/integration/linux_rpm_test.go | 194 ++++ .../testing/integration/log_level_test.go | 392 +++++++ .../integration/logs_ingestion_test.go | 552 ++++++++++ .../integration/metrics_monitoring_test.go | 146 +++ .../integration/monitoring_endpoint_test.go | 266 +++++ ...monitoring_probe_preserve_text_cfg_test.go | 206 ++++ .../monitoring_probe_reload_test.go | 183 ++++ .../testing/integration/otel_test.go | 617 +++++++++++ .../integration/package_version_test.go | 375 +++++++ .../integration/pkgversion_common_test.go | 129 +++ .../testing/integration/proxy_url_test.go | 793 ++++++++++++++ .../integration/switch_privileged_test.go | 139 +++ .../integration/switch_unprivileged_test.go | 139 +++ .../integration/system_integration_setup.json | 789 ++++++++++++++ .../testdata/.upgrade-test-agent-versions.yml | 12 + .../integration/testdata/connectors.agent.yml | 13 + .../upgrade_broken_package_test.go | 97 ++ .../testing/integration/upgrade_fleet_test.go | 661 ++++++++++++ .../testing/integration/upgrade_gpg_test.go | 168 +++ .../integration/upgrade_rollback_test.go | 291 ++++++ .../upgrade_standalone_inprogress_test.go | 102 ++ .../upgrade_standalone_retry_test.go | 99 ++ .../upgrade_standalone_same_commit_test.go | 373 +++++++ .../integration/upgrade_standalone_test.go | 78 ++ .../integration/upgrade_uninstall_test.go | 100 ++ 117 files changed, 24682 insertions(+), 320 deletions(-) create mode 100644 dev-tools/mage/agentbeat-serverless.go delete mode 100644 dev-tools/mage/spec.go create mode 100644 dev-tools/mage/target/srvrlesstest/component/platforms.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/cmd.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/cmd_darwin.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/cmd_linux.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/external_unix.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/external_windows.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/job_unix.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/job_windows.go create mode 100644 dev-tools/mage/target/srvrlesstest/core/process/process.go create mode 100644 dev-tools/mage/target/srvrlesstest/define/batch.go create mode 100644 dev-tools/mage/target/srvrlesstest/define/requirements.go create mode 100644 dev-tools/mage/target/srvrlesstest/srvrlesstest.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/batch.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/build.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/config.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/instance.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/logger.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/prefix_output.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/runner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/stack.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/common/supported.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/define/batch.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/define/requirements.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/client.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/config.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_csp_configuration.yaml create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_request.tmpl.json create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/deployment.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/serverless.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/serverless_provisioner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ess/statful_provisioner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/fetcher.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/kubernetes/image.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/kubernetes/kind/provisioner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/kubernetes/runner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/kubernetes/supported.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/linux/debian.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/linux/linux.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/linux/rhel.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/log.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/multipas/provisioner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ogc/api.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ogc/config.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ogc/provisioner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ogc/supported.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/runner/archiver.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/runner/json.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/runner/junit.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/runner/runner.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/runner/utils.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ssh/client.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ssh/file.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ssh/interface.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/ssh/keys.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/supported/batch.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/supported/supported.go create mode 100644 dev-tools/mage/target/srvrlesstest/testing/windows/windows.go create mode 100644 dev-tools/mage/target/srvrlesstest/utils/root_unix.go create mode 100644 dev-tools/mage/target/srvrlesstest/utils/root_windows.go create mode 100644 dev-tools/mage/target/srvrlesstest/utils/root_windows_test.go create mode 100644 x-pack/agentbeat/testing/integration/README.md create mode 100644 x-pack/agentbeat/testing/integration/agent_long_running_leak_test.go create mode 100644 x-pack/agentbeat/testing/integration/agent_long_test_apache.json create mode 100644 x-pack/agentbeat/testing/integration/agent_long_test_base_system_integ.json create mode 100644 x-pack/agentbeat/testing/integration/apm_propagation_test.go create mode 100644 x-pack/agentbeat/testing/integration/beats_serverless_test.go create mode 100644 x-pack/agentbeat/testing/integration/container_cmd_test.go create mode 100644 x-pack/agentbeat/testing/integration/delay_enroll_test.go create mode 100644 x-pack/agentbeat/testing/integration/diagnostics_test.go create mode 100644 x-pack/agentbeat/testing/integration/endpoint_security_package.json.tmpl create mode 100644 x-pack/agentbeat/testing/integration/endpoint_security_test.go create mode 100644 x-pack/agentbeat/testing/integration/endpoint_test_tools.go create mode 100644 x-pack/agentbeat/testing/integration/event_logging_test.go create mode 100644 x-pack/agentbeat/testing/integration/fake_test.go create mode 100644 x-pack/agentbeat/testing/integration/fakes.go create mode 100644 x-pack/agentbeat/testing/integration/fleet-server.json create mode 100644 x-pack/agentbeat/testing/integration/fleetserver_test.go create mode 100644 x-pack/agentbeat/testing/integration/fqdn_test.go create mode 100644 x-pack/agentbeat/testing/integration/groups_test.go create mode 100644 x-pack/agentbeat/testing/integration/inspect_test.go create mode 100644 x-pack/agentbeat/testing/integration/install_test.go create mode 100644 x-pack/agentbeat/testing/integration/kubernetes_agent_service_test.go create mode 100644 x-pack/agentbeat/testing/integration/kubernetes_agent_standalone_test.go create mode 100644 x-pack/agentbeat/testing/integration/linux_deb_test.go create mode 100644 x-pack/agentbeat/testing/integration/linux_rpm_test.go create mode 100644 x-pack/agentbeat/testing/integration/log_level_test.go create mode 100644 x-pack/agentbeat/testing/integration/logs_ingestion_test.go create mode 100644 x-pack/agentbeat/testing/integration/metrics_monitoring_test.go create mode 100644 x-pack/agentbeat/testing/integration/monitoring_endpoint_test.go create mode 100644 x-pack/agentbeat/testing/integration/monitoring_probe_preserve_text_cfg_test.go create mode 100644 x-pack/agentbeat/testing/integration/monitoring_probe_reload_test.go create mode 100644 x-pack/agentbeat/testing/integration/otel_test.go create mode 100644 x-pack/agentbeat/testing/integration/package_version_test.go create mode 100644 x-pack/agentbeat/testing/integration/pkgversion_common_test.go create mode 100644 x-pack/agentbeat/testing/integration/proxy_url_test.go create mode 100644 x-pack/agentbeat/testing/integration/switch_privileged_test.go create mode 100644 x-pack/agentbeat/testing/integration/switch_unprivileged_test.go create mode 100644 x-pack/agentbeat/testing/integration/system_integration_setup.json create mode 100644 x-pack/agentbeat/testing/integration/testdata/.upgrade-test-agent-versions.yml create mode 100644 x-pack/agentbeat/testing/integration/testdata/connectors.agent.yml create mode 100644 x-pack/agentbeat/testing/integration/upgrade_broken_package_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_fleet_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_gpg_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_rollback_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_standalone_inprogress_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_standalone_retry_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_standalone_same_commit_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_standalone_test.go create mode 100644 x-pack/agentbeat/testing/integration/upgrade_uninstall_test.go diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 5cb0722edd84..884c413cdb56 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -21,8 +21,8 @@ fi if [[ "$BUILDKITE_PIPELINE_SLUG" == "beats-xpack-agentbeat" && "$BUILDKITE_STEP_KEY" == *"agentbeat-it"* ]]; then out=$(.buildkite/scripts/agentbeat/setup_agentbeat.py) echo "$out" - AGENTBEAT_PATH=$(echo "$out" | tail -n 1) - export AGENTBEAT_PATH + AGENT_BUILD_DIR=$(echo "$out" | tail -n 1) + export AGENT_BUILD_DIR fi if [[ "$BUILDKITE_PIPELINE_SLUG" == "auditbeat" || \ diff --git a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml index 1687aa25d922..2fb2b7654af1 100644 --- a/.buildkite/x-pack/pipeline.xpack.agentbeat.yml +++ b/.buildkite/x-pack/pipeline.xpack.agentbeat.yml @@ -1,43 +1,31 @@ env: ASDF_MAGE_VERSION: 1.15.0 - ASDF_NODEJS_VERSION: 18.17.1 - GCP_HI_PERF_MACHINE_TYPE: "c2d-highcpu-16" IMAGE_UBUNTU_X86_64: "family/platform-ingest-beats-ubuntu-2204" - AWS_ARM_INSTANCE_TYPE: "m6g.xlarge" - AWS_IMAGE_UBUNTU_ARM_64: "platform-ingest-beats-ubuntu-2204-aarch64" - - IMAGE_MACOS_ARM: "generic-13-ventura-arm" - IMAGE_MACOS_X86_64: "generic-13-ventura-x64" - - IMAGE_WIN_2022: "family/platform-ingest-beats-windows-2022" - IMAGE_BEATS_WITH_HOOKS_LATEST: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:latest" - AGENTBEAT_SPEC: "./agentbeat.spec.yml" - steps: - - group: "Check/Update" - key: "x-pack-agentbeat-check-update" - - steps: - - label: "agentbeat: Run pre-commit" - command: "pre-commit run --all-files" - agents: - image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" - memory: "2Gi" - useCustomGlobalHooks: true - notify: - - github_commit_status: - context: "agentbeat: pre-commit" - - - wait: ~ - # with PRs, we want to run mandatory tests only if check/update step succeed - # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests - # this allows building DRA artifacts even if there is flakiness in check/update step - if: build.env("BUILDKITE_PULL_REQUEST") != "false" - depends_on: "x-pack-agentbeat-check-update" +# - group: "Check/Update" +# key: "x-pack-agentbeat-check-update" +# +# steps: +# - label: "agentbeat: Run pre-commit" +# command: "pre-commit run --all-files" +# agents: +# image: "${IMAGE_BEATS_WITH_HOOKS_LATEST}" +# memory: "2Gi" +# useCustomGlobalHooks: true +# notify: +# - github_commit_status: +# context: "agentbeat: pre-commit" +# +# - wait: ~ +# # with PRs, we want to run mandatory tests only if check/update step succeed +# # for other cases, e.g. merge commits, we want to run mundatory test (and publish) independently of other tests +# # this allows building DRA artifacts even if there is flakiness in check/update step +# if: build.env("BUILDKITE_PULL_REQUEST") != "false" +# depends_on: "x-pack-agentbeat-check-update" - group: "Agentbeat tests" key: "agentbeat-mandatory-tests" @@ -47,6 +35,7 @@ steps: key: "agentbeat-package-linux" env: PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" + PACKAGES: tar.gz,zip SNAPSHOT: true command: | set -euo pipefail @@ -70,93 +59,87 @@ steps: - github_commit_status: context: "agentbeat: Packaging" - - label: ":ubuntu: x-pack/agentbeat: Ubuntu x86_64 Spec tests" - key: "agentbeat-it-linux-x86-64" - env: - PLATFORM: "linux/amd64" +# - label: ":linux: Agentbeat/Integration tests Linux" +# key: "agentbeat-it-linux" +# depends_on: +# - agentbeat-package-linux +# env: +# ASDF_NODEJS_VERSION: 18.17.1 +# PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" +# SNAPSHOT: true +# command: | +# set -euo pipefail +# echo "~~~ Downloading artifacts" +# buildkite-agent artifact download x-pack/agentbeat/build/distributions/** . --step 'agentbeat-package-linux' +# ls -lah x-pack/agentbeat/build/distributions/ +# echo "~~~ Installing @elastic/synthetics with npm" +# npm install -g @elastic/synthetics +# echo "~~~ Running tests" +# cd x-pack/agentbeat +# mage goIntegTest +# artifact_paths: +# - x-pack/agentbeat/build/distributions/**/* +# - "x-pack/agentbeat/build/*.xml" +# - "x-pack/agentbeat/build/*.json" +# plugins: +# - test-collector#v1.10.2: +# files: "x-pack/agentbeat/build/TEST-*.xml" +# format: "junit" +# branches: "main" +# debug: true +# retry: +# automatic: +# - limit: 1 +# timeout_in_minutes: 60 +# agents: +# provider: "gcp" +# image: "${IMAGE_UBUNTU_X86_64}" +# machineType: "${GCP_HI_PERF_MACHINE_TYPE}" +# disk_size: 100 +# disk_type: "pd-ssd" +# notify: +# - github_commit_status: +# context: "agentbeat: Integration tests" + + - group: "Agentbeat: Servelress Tests" + key: "agentbeat-serverless-tests" + + steps: + - label: ":ubuntu: Serverless tests" + key: "agentbeat-it-serverless" depends_on: - agentbeat-package-linux + env: + AGENT_STACK_VERSION: "8.16.0-SNAPSHOT" + TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-a" + GOFLAGS: "-buildvcs=false" + TEST_INTEG_CLEAN_ON_EXIT: true + TEST_PLATFORMS: "linux/amd64" + SNAPSHOT: true command: | cd x-pack/agentbeat - mage -v testWithSpec + mage serverlessTest metricbeat + artifact_paths: + - x-pack/agentbeat/build/TEST-** + - x-pack/agentbeat/build/diagnostics/* + plugins: + - test-collector#v1.10.2: + files: "x-pack/agentbeat/build/TEST-*.xml" + format: "junit" + branches: "main" + debug: true + retry: + automatic: + - limit: 1 + timeout_in_minutes: 60 + concurrency_group: elastic-agent-extended-testing/serverless-integration + concurrency: 8 agents: provider: "gcp" image: "${IMAGE_UBUNTU_X86_64}" - machineType: "${GCP_HI_PERF_MACHINE_TYPE}" + machineType: "${GCP_STANDARD_MACHINE_TYPE}" disk_size: 100 disk_type: "pd-ssd" notify: - github_commit_status: - context: "agentbeat: Ubuntu x86_64 Spec tests" - - - label: ":ubuntu: x-pack/agentbeat: Ubuntu arm64 Spec tests" - key: "agentbeat-it-linux-arm64" - env: - PLATFORM: "linux/arm64" - depends_on: - - agentbeat-package-linux - command: | - cd x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "aws" - imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" - instanceType: "${AWS_ARM_INSTANCE_TYPE}" - notify: - - github_commit_status: - context: "agentbeat: Ubuntu arm64 Spec tests" - - - label: ":windows: x-pack/agentbeat: Windows x86_64 Spec tests" - key: "agentbeat-it-windows" - env: - PLATFORM: "windows/amd64" - depends_on: - - agentbeat-package-linux - command: | - Set-Location -Path x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "gcp" - image: "${IMAGE_WIN_2022}" - machine_type: "${GCP_WIN_MACHINE_TYPE}" - disk_size: 200 - disk_type: "pd-ssd" - notify: - - github_commit_status: - context: "agentbeat: Windows x86_64 Spec tests" - - - label: ":macos: x-pack/agentbeat: macOS x86_64 Spec tests" - key: "agentbeat-it-macos-x86-64" - depends_on: - - agentbeat-package-linux - env: - PLATFORM: "darwin/amd64" - command: | - set -euo pipefail - source .buildkite/scripts/install_macos_tools.sh - cd x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "orka" - imagePrefix: "${IMAGE_MACOS_X86_64}" - notify: - - github_commit_status: - context: "agentbeat: macOS x86_64 Spec tests" - - - label: ":macos: x-pack/agentbeat: macOS arm64 Spec tests" - key: "agentbeat-it-macos-arm64" - depends_on: - - agentbeat-package-linux - env: - PLATFORM: "darwin/arm64" - command: | - set -euo pipefail - source .buildkite/scripts/install_macos_tools.sh - cd x-pack/agentbeat - mage -v testWithSpec - agents: - provider: "orka" - imagePrefix: "${IMAGE_MACOS_ARM}" - notify: - - github_commit_status: - context: "agentbeat: macOS arm64 Spec tests" + context: "agentbeat: Serverless tests" diff --git a/dev-tools/mage/agentbeat-serverless.go b/dev-tools/mage/agentbeat-serverless.go new file mode 100644 index 000000000000..3b369f42c324 --- /dev/null +++ b/dev-tools/mage/agentbeat-serverless.go @@ -0,0 +1,56 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package mage + +import ( + "fmt" + "log" + "os" +) + +// TestBeatServerless todo description +func TestBeatServerless(beat string) { + if beat == "" { + log.Fatal("Beat is not defined") + } + + if os.Getenv("AGENT_BUILD_DIR") == "" { + log.Fatal("AGENT_BUILD_DIR is not defined") + } + + setStackProvisioner() + setTestBinaryName(beat) + +} + +func setStackProvisioner() { + stackProvisioner := os.Getenv("STACK_PROVISIONER") + if stackProvisioner == "" { + if err := os.Setenv("STACK_PROVISIONER", "serverless"); err != nil { + log.Fatal("error setting serverless stack var: %w", err) + } + } else if stackProvisioner == "stateful" { + fmt.Println("--- Warning: running TestBeatServerless as stateful") + } +} + +func setTestBinaryName(beat string) { + if err := os.Setenv("TEST_BINARY_NAME", beat); err != nil { + log.Fatal("error setting binary name: %w", err) + } +} diff --git a/dev-tools/mage/gotest.go b/dev-tools/mage/gotest.go index ecc8f277b941..efed67fdea6b 100644 --- a/dev-tools/mage/gotest.go +++ b/dev-tools/mage/gotest.go @@ -428,3 +428,30 @@ func BuildSystemTestGoBinary(binArgs TestBinaryArgs) error { }() return sh.RunV("go", args...) } + +func GoTestBuild(ctx context.Context, params GoTestArgs) error { + if params.OutputFile == "" { + return fmt.Errorf("missing output file") + } + + fmt.Println(">> go test:", params.TestName, "Building Test Binary") + + args := []string{"test", "-c", "-o", params.OutputFile} + + if len(params.Tags) > 0 { + params := strings.Join(params.Tags, " ") + if params != "" { + args = append(args, "-tags", params) + } + } + + args = append(args, params.Packages...) + + goTestBuild := makeCommand(ctx, params.Env, "go", args...) + + err := goTestBuild.Run() + if err != nil { + return err + } + return nil +} diff --git a/dev-tools/mage/spec.go b/dev-tools/mage/spec.go deleted file mode 100644 index 03c733f1dd6d..000000000000 --- a/dev-tools/mage/spec.go +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package mage - -import ( - "gopkg.in/yaml.v2" - - "log" - "os" - "strings" -) - -type spec struct { - Inputs []input -} - -type input struct { - Name string - Description string - Platforms []string - Command command -} - -func (i *input) GetCommand() string { - return strings.Join(i.Command.Args, " ") -} - -type command struct { - Name string - Args []string -} - -// SpecCommands parses agent.beat.spec.yml and collects commands for tests -func SpecCommands(specPath string, platform string) []string { - spec, _ := parseToObj(specPath) - - filteredInputs := filter(spec.Inputs, func(input input) bool { - return contains(input.Platforms, platform) - }) - - commands := make(map[string]interface{}) - for _, i := range filteredInputs { - commands[i.GetCommand()] = nil - } - keys := make([]string, 0, len(commands)) - for k := range commands { - keys = append(keys, k) - } - - return keys -} - -func parseToObj(path string) (spec, error) { - specFile, err := os.ReadFile(path) - if err != nil { - log.Fatalf("Error opening agentbeat.spec.yml: %v", err) - return spec{}, err - } - var spec spec - err = yaml.Unmarshal(specFile, &spec) - if err != nil { - log.Fatalf("Error parsing agentbeat.spec.yml: %v", err) - return spec, err - } - return spec, nil -} - -func filter[T any](slice []T, condition func(T) bool) []T { - var result []T - for _, v := range slice { - if condition(v) { - result = append(result, v) - } - } - return result -} - -func contains(slice []string, item string) bool { - for _, v := range slice { - if v == item { - return true - } - } - return false -} diff --git a/dev-tools/mage/target/srvrlesstest/component/platforms.go b/dev-tools/mage/target/srvrlesstest/component/platforms.go new file mode 100644 index 000000000000..7825b3e5fdcd --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/component/platforms.go @@ -0,0 +1,175 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package component + +import ( + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/utils" + goruntime "runtime" + "strings" + + "github.com/elastic/go-sysinfo" +) + +const ( + // Container represents running inside a container + Container = "container" + // Darwin represents running on Mac OSX + Darwin = "darwin" + // Linux represents running on Linux + Linux = "linux" + // Windows represents running on Windows + Windows = "windows" +) + +const ( + // AMD64 represents the amd64 architecture + AMD64 = "amd64" + // ARM64 represents the arm64 architecture + ARM64 = "arm64" +) + +// Platform defines the platform that a component can support +type Platform struct { + OS string + Arch string + GOOS string +} + +// Platforms is an array of platforms. +type Platforms []Platform + +// GlobalPlatforms defines the platforms that a component can support +var GlobalPlatforms = Platforms{ + { + OS: Container, + Arch: AMD64, + GOOS: Linux, + }, + { + OS: Container, + Arch: ARM64, + GOOS: Linux, + }, + { + OS: Darwin, + Arch: AMD64, + GOOS: Darwin, + }, + { + OS: Darwin, + Arch: ARM64, + GOOS: Darwin, + }, + { + OS: Linux, + Arch: AMD64, + GOOS: Linux, + }, + { + OS: Linux, + Arch: ARM64, + GOOS: Linux, + }, + { + OS: Windows, + Arch: AMD64, + GOOS: Windows, + }, +} + +// String returns the platform string identifier. +func (p *Platform) String() string { + return fmt.Sprintf("%s/%s", p.OS, p.Arch) +} + +// Exists returns true if the +func (p Platforms) Exists(platform string) bool { + pieces := strings.SplitN(platform, "/", 2) + if len(pieces) != 2 { + return false + } + for _, platform := range p { + if platform.OS == pieces[0] && platform.Arch == pieces[1] { + return true + } + } + return false +} + +// UserDetail provides user specific information on the running platform. +type UserDetail struct { + Root bool +} + +// PlatformDetail is platform that has more detail information about the running platform. +type PlatformDetail struct { + Platform + + NativeArch string + Family string + Major int + Minor int + + User UserDetail +} + +// PlatformModifier can modify the platform details before the runtime specifications are loaded. +type PlatformModifier func(detail PlatformDetail) PlatformDetail + +// LoadPlatformDetail loads the platform details for the current system. +func LoadPlatformDetail(modifiers ...PlatformModifier) (PlatformDetail, error) { + hasRoot, err := utils.HasRoot() + if err != nil { + return PlatformDetail{}, err + } + info, err := sysinfo.Host() + if err != nil { + return PlatformDetail{}, err + } + os := info.Info().OS + nativeArch := info.Info().NativeArchitecture + if nativeArch == "x86_64" { + // go-sysinfo Architecture and NativeArchitecture prefer x64_64 + // but GOARCH prefers amd64 + nativeArch = "amd64" + } + if nativeArch == "aarch64" { + // go-sysinfo Architecture and NativeArchitecture prefer aarch64 + // but GOARCH prefers arm64 + nativeArch = "arm64" + } + detail := PlatformDetail{ + Platform: Platform{ + OS: goruntime.GOOS, + Arch: goruntime.GOARCH, + GOOS: goruntime.GOOS, + }, + NativeArch: nativeArch, + Family: os.Family, + Major: os.Major, + Minor: os.Minor, + User: UserDetail{ + Root: hasRoot, + }, + } + for _, modifier := range modifiers { + detail = modifier(detail) + } + return detail, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/cmd.go b/dev-tools/mage/target/srvrlesstest/core/process/cmd.go new file mode 100644 index 000000000000..fc5f262e6626 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/cmd.go @@ -0,0 +1,49 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build !linux && !darwin + +package process + +import ( + "context" + "os" + "os/exec" + "path/filepath" +) + +func getCmd(ctx context.Context, path string, env []string, uid, gid int, arg ...string) (*exec.Cmd, error) { + var cmd *exec.Cmd + if ctx == nil { + cmd = exec.Command(path, arg...) + } else { + cmd = exec.CommandContext(ctx, path, arg...) + } + cmd.Env = append(cmd.Env, os.Environ()...) + cmd.Env = append(cmd.Env, env...) + cmd.Dir = filepath.Dir(path) + + return cmd, nil +} + +func killCmd(proc *os.Process) error { + return proc.Kill() +} + +func terminateCmd(proc *os.Process) error { + return proc.Kill() +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/cmd_darwin.go b/dev-tools/mage/target/srvrlesstest/core/process/cmd_darwin.go new file mode 100644 index 000000000000..9a5be3e1beba --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/cmd_darwin.go @@ -0,0 +1,67 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build darwin + +package process + +import ( + "context" + "fmt" + "math" + "os" + "os/exec" + "path/filepath" + "syscall" +) + +func getCmd(ctx context.Context, path string, env []string, uid, gid int, arg ...string) (*exec.Cmd, error) { + var cmd *exec.Cmd + if ctx == nil { + cmd = exec.Command(path, arg...) + } else { + cmd = exec.CommandContext(ctx, path, arg...) + } + cmd.Env = append(cmd.Env, os.Environ()...) + cmd.Env = append(cmd.Env, env...) + cmd.Dir = filepath.Dir(path) + if isInt32(uid) && isInt32(gid) { + cmd.SysProcAttr = &syscall.SysProcAttr{ + Credential: &syscall.Credential{ + Uid: uint32(uid), + Gid: uint32(gid), + NoSetGroups: true, + }, + } + } else { + return nil, fmt.Errorf("invalid uid: '%d' or gid: '%d'", uid, gid) + } + + return cmd, nil +} + +func isInt32(val int) bool { + return val >= 0 && val <= math.MaxInt32 +} + +func killCmd(proc *os.Process) error { + return proc.Kill() +} + +func terminateCmd(proc *os.Process) error { + return proc.Signal(syscall.SIGTERM) +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/cmd_linux.go b/dev-tools/mage/target/srvrlesstest/core/process/cmd_linux.go new file mode 100644 index 000000000000..3a480128ffb2 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/cmd_linux.go @@ -0,0 +1,70 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build linux + +package process + +import ( + "context" + "fmt" + "math" + "os" + "os/exec" + "path/filepath" + "syscall" +) + +func getCmd(ctx context.Context, path string, env []string, uid, gid int, arg ...string) (*exec.Cmd, error) { + var cmd *exec.Cmd + if ctx == nil { + cmd = exec.Command(path, arg...) + } else { + cmd = exec.CommandContext(ctx, path, arg...) + } + cmd.Env = append(cmd.Env, os.Environ()...) + cmd.Env = append(cmd.Env, env...) + cmd.Dir = filepath.Dir(path) + if isInt32(uid) && isInt32(gid) { + cmd.SysProcAttr = &syscall.SysProcAttr{ + // on shutdown all sub-processes are sent SIGTERM, in the case that the Agent dies or is -9 killed + // then also kill the children (only supported on linux) + Pdeathsig: syscall.SIGKILL, + Credential: &syscall.Credential{ + Uid: uint32(uid), + Gid: uint32(gid), + NoSetGroups: true, + }, + } + } else { + return nil, fmt.Errorf("invalid uid: '%d' or gid: '%d'", uid, gid) + } + + return cmd, nil +} + +func isInt32(val int) bool { + return val >= 0 && val <= math.MaxInt32 +} + +func killCmd(proc *os.Process) error { + return proc.Kill() +} + +func terminateCmd(proc *os.Process) error { + return proc.Signal(syscall.SIGTERM) +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/external_unix.go b/dev-tools/mage/target/srvrlesstest/core/process/external_unix.go new file mode 100644 index 000000000000..6563556b3ee6 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/external_unix.go @@ -0,0 +1,42 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build !windows + +package process + +import ( + "os" + "syscall" + "time" +) + +// externalProcess is a watch mechanism used in cases where OS requires a process to be a child +// for waiting for process. We need to be able to await any process. +func externalProcess(proc *os.Process) { + if proc == nil { + return + } + + for { + <-time.After(1 * time.Second) + if proc.Signal(syscall.Signal(0)) != nil { + // failed to contact process, return + return + } + } +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/external_windows.go b/dev-tools/mage/target/srvrlesstest/core/process/external_windows.go new file mode 100644 index 000000000000..8d3aa616964c --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/external_windows.go @@ -0,0 +1,65 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build windows + +package process + +import ( + "os" + "syscall" + "time" +) + +const ( + // exitCodeStillActive according to docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-getexitcodeprocess + exitCodeStillActive = 259 +) + +// externalProcess is a watch mechanism used in cases where OS requires a process to be a child +// for waiting for process. We need to be able to await any process. +func externalProcess(proc *os.Process) { + if proc == nil { + return + } + + for { + <-time.After(1 * time.Second) + if isWindowsProcessExited(proc.Pid) { + return + } + } +} + +func isWindowsProcessExited(pid int) bool { + const desiredAccess = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE + h, err := syscall.OpenProcess(desiredAccess, false, uint32(pid)) + if err != nil { + // failed to open handle, report exited + return true + } + + // get exit code, this returns immediately in case it is still running + // it returns exitCodeStillActive + var ec uint32 + if err := syscall.GetExitCodeProcess(h, &ec); err != nil { + // failed to contact, report exited + return true + } + + return ec != exitCodeStillActive +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/job_unix.go b/dev-tools/mage/target/srvrlesstest/core/process/job_unix.go new file mode 100644 index 000000000000..37a030e41591 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/job_unix.go @@ -0,0 +1,52 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build !windows + +package process + +import ( + "os" +) + +// Job is noop on unix +type Job int + +var ( + // JobObject is public global JobObject, 0 value on linux + JobObject Job +) + +// CreateJobObject returns a job object. +func CreateJobObject() (pj Job, err error) { + return pj, err +} + +// NewJob is noop on unix +func NewJob() (Job, error) { + return 0, nil +} + +// Close is noop on unix +func (job Job) Close() error { + return nil +} + +// Assign is noop on unix +func (job Job) Assign(p *os.Process) error { + return nil +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/job_windows.go b/dev-tools/mage/target/srvrlesstest/core/process/job_windows.go new file mode 100644 index 000000000000..332f3ed4f27f --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/job_windows.go @@ -0,0 +1,100 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build windows + +package process + +import ( + "os" + "unsafe" + + "golang.org/x/sys/windows" +) + +// Job is wrapper for windows JobObject +// https://docs.microsoft.com/en-us/windows/win32/procthread/job-objects +// This helper guarantees a clean process tree kill on job handler close +type Job windows.Handle + +var ( + // Public global JobObject should be initialized once in main + JobObject Job +) + +// CreateJobObject creates JobObject on Windows, global per process +// Should only be initialized once in main function +func CreateJobObject() (pj Job, err error) { + if pj, err = NewJob(); err != nil { + return pj, err + } + JobObject = pj + return pj, nil +} + +// NewJob creates a instance of the JobObject +func NewJob() (Job, error) { + h, err := windows.CreateJobObject(nil, nil) + if err != nil { + return 0, err + } + + // From https://docs.microsoft.com/en-us/windows/win32/procthread/job-objects + // ... if the job has the JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE flag specified, + // closing the last job object handle terminates all associated processes + // and then destroys the job object itself. + // If a nested job has the JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE flag specified, + // closing the last job object handle terminates all processes associated + // with the job and its child jobs in the hierarchy. + info := windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION{ + BasicLimitInformation: windows.JOBOBJECT_BASIC_LIMIT_INFORMATION{ + LimitFlags: windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, + }, + } + if _, err := windows.SetInformationJobObject( + h, + windows.JobObjectExtendedLimitInformation, + uintptr(unsafe.Pointer(&info)), + uint32(unsafe.Sizeof(info))); err != nil { + return 0, err + } + + return Job(h), nil +} + +// Close closes job handler +func (job Job) Close() error { + if job == 0 { + return nil + } + return windows.CloseHandle(windows.Handle(job)) +} + +// Assign assigns the process to the JobObject +func (job Job) Assign(p *os.Process) error { + if job == 0 || p == nil { + return nil + } + return windows.AssignProcessToJobObject( + windows.Handle(job), + windows.Handle((*process)(unsafe.Pointer(p)).Handle)) +} + +type process struct { + Pid int + Handle uintptr +} diff --git a/dev-tools/mage/target/srvrlesstest/core/process/process.go b/dev-tools/mage/target/srvrlesstest/core/process/process.go new file mode 100644 index 000000000000..e2d60efa1c7e --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/core/process/process.go @@ -0,0 +1,197 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package process + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" +) + +// Info groups information about fresh new process +type Info struct { + PID int + Process *os.Process + Stdin io.WriteCloser + Stderr io.ReadCloser +} + +// CmdOption is an option func to change the underlying command +type CmdOption func(c *exec.Cmd) error + +// StartConfig configuration for the process start set by the StartOption functions +type StartConfig struct { + ctx context.Context + uid, gid int + args, env []string + cmdOpts []CmdOption +} + +// StartOption start options function +type StartOption func(cfg *StartConfig) + +// Start starts a new process +func Start(path string, opts ...StartOption) (proc *Info, err error) { + // Apply options + c := StartConfig{ + uid: os.Geteuid(), + gid: os.Getegid(), + } + + for _, opt := range opts { + opt(&c) + } + + return startContext(c.ctx, path, c.uid, c.gid, c.args, c.env, c.cmdOpts...) +} + +// WithContext sets an optional context +func WithContext(ctx context.Context) StartOption { + return func(cfg *StartConfig) { + cfg.ctx = ctx + } +} + +// WithArgs sets arguments +func WithArgs(args []string) StartOption { + return func(cfg *StartConfig) { + cfg.args = args + } +} + +// WithEnv sets the environment variables +func WithEnv(env []string) StartOption { + return func(cfg *StartConfig) { + cfg.env = env + } +} + +// WithUID sets UID +func WithUID(uid int) StartOption { + return func(cfg *StartConfig) { + cfg.uid = uid + } +} + +// WithGID sets GID +func WithGID(gid int) StartOption { + return func(cfg *StartConfig) { + cfg.gid = gid + } +} + +// WithCmdOptions sets the exec.Cmd options +func WithCmdOptions(cmdOpts ...CmdOption) StartOption { + return func(cfg *StartConfig) { + cfg.cmdOpts = cmdOpts + } +} + +// WithWorkDir sets the cmd working directory +func WithWorkDir(wd string) CmdOption { + return func(c *exec.Cmd) error { + c.Dir = wd + return nil + } +} + +// Kill kills the process. +func (i *Info) Kill() error { + return killCmd(i.Process) +} + +// Stop stops the process cleanly. +func (i *Info) Stop() error { + return terminateCmd(i.Process) +} + +// StopWait stops the process and waits for it to exit. +func (i *Info) StopWait() error { + err := i.Stop() + if err != nil { + return err + } + _, err = i.Process.Wait() + return err +} + +// Wait returns a channel that will send process state once it exits. Each +// call to Wait() creates a goroutine. Failure to read from the returned +// channel will leak this goroutine. +func (i *Info) Wait() <-chan *os.ProcessState { + ch := make(chan *os.ProcessState) + + go func() { + procState, err := i.Process.Wait() + if err != nil { + // process is not a child - some OSs requires process to be child + externalProcess(i.Process) + } + ch <- procState + }() + + return ch +} + +// startContext starts a new process with context. The context is optional and can be nil. +func startContext(ctx context.Context, path string, uid, gid int, args []string, env []string, opts ...CmdOption) (*Info, error) { + cmd, err := getCmd(ctx, path, env, uid, gid, args...) + if err != nil { + return nil, fmt.Errorf("failed to create command for %q: %w", path, err) + } + for _, o := range opts { + if err := o(cmd); err != nil { + return nil, fmt.Errorf("failed to set option command for %q: %w", path, err) + } + } + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, fmt.Errorf("failed to create stdin for %q: %w", path, err) + } + + var stderr io.ReadCloser + if cmd.Stderr == nil { + stderr, err = cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("failed to create stderr for %q: %w", path, err) + } + } + + // start process + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start %q: %w", path, err) + } + + // Hook to JobObject on windows, noop on other platforms. + // This ties the application processes lifespan to the agent's. + // Fixes the orphaned beats processes left behind situation + // after the agent process gets killed. + if err := JobObject.Assign(cmd.Process); err != nil { + _ = killCmd(cmd.Process) + return nil, fmt.Errorf("failed job assignment %q: %w", path, err) + } + + return &Info{ + PID: cmd.Process.Pid, + Process: cmd.Process, + Stdin: stdin, + Stderr: stderr, + }, err +} diff --git a/dev-tools/mage/target/srvrlesstest/define/batch.go b/dev-tools/mage/target/srvrlesstest/define/batch.go new file mode 100644 index 000000000000..c8b6ac65a0e0 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/define/batch.go @@ -0,0 +1,320 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package define + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "os/exec" + "path/filepath" + "strings" +) + +// defaultOS is the set of OS that are used in the case that a requirement doesn't define any +var defaultOS = []OS{ + { + Type: Darwin, + Arch: AMD64, + }, + { + Type: Darwin, + Arch: ARM64, + }, + { + Type: Linux, + Arch: AMD64, + }, + { + Type: Linux, + Arch: ARM64, + }, + { + Type: Windows, + Arch: AMD64, + }, +} + +// Batch is a grouping of tests that all have the same requirements. +type Batch struct { + // Group must be set on each test to define which group the tests belongs. + // Tests that are in the same group are executed on the same runner. + Group string `json:"group"` + + // OS defines the operating systems this test batch needs. + OS OS `json:"os"` + + // Stack defines the stack required for this batch. + Stack *Stack `json:"stack,omitempty"` + + // Tests define the set of packages and tests that do not require sudo + // privileges to be performed. + Tests []BatchPackageTests `json:"tests"` + + // SudoTests define the set of packages and tests that do require sudo + // privileges to be performed. + SudoTests []BatchPackageTests `json:"sudo_tests"` +} + +// BatchPackageTests is a package and its tests that belong to a batch. +type BatchPackageTests struct { + // Name is the package name. + Name string `json:"name"` + // Tests is the set of tests in the package. + Tests []BatchPackageTest `json:"tests"` +} + +// BatchPackageTest is a specific test in a package. +type BatchPackageTest struct { + // Name of the test. + Name string `json:"name"` + // Stack needed for test. + Stack bool `json:"stack"` +} + +// DetermineBatches parses the package directory with the possible extra build +// tags to determine the set of batches for the package. +func DetermineBatches(dir string, testFlags string, buildTags ...string) ([]Batch, error) { + const ( + defineMatcher = "define skip; requirements: " + ) + + // the 'define' build tag is added so that the `define.Require` skips and + // logs the requirements for each test. + buildTags = append(buildTags, "define") + + // 'go test' wants a directory path to either be absolute or start with + // './' so it knows it's a directory and not package. + if !filepath.IsAbs(dir) && !strings.HasPrefix(dir, "./") { + dir = "./" + dir + } + + // run 'go test' and collect the JSON output to be parsed + // #nosec G204 -- test function code, it will be okay + cmdArgs := []string{"test", "-v", "--tags", strings.Join(buildTags, ","), "-json"} + if testFlags != "" { + flags := strings.Split(testFlags, " ") + cmdArgs = append(cmdArgs, flags...) + } + + cmdArgs = append(cmdArgs, dir) + testCmd := exec.Command("go", cmdArgs...) + output, err := testCmd.Output() + if err != nil { + // format cmdArgs to make the error message more coherent + cmdArgs = append([]string{"go"}, cmdArgs...) + + var errExit *exec.ExitError + if errors.As(err, &errExit) { + b := bytes.NewBuffer(errExit.Stderr) + b.Write(output) + output = b.Bytes() + } + return nil, fmt.Errorf( + "error running go test: (%w), got:\n\n%s\ntried to run: %v", + err, string(output), cmdArgs) + } + + // parses each test and determine the batches that each test belongs in + var batches []Batch + sc := bufio.NewScanner(bytes.NewReader(output)) + for sc.Scan() { + var tar testActionResult + err := json.Unmarshal([]byte(sc.Text()), &tar) + if err != nil { + return nil, err + } + if tar.Action == "output" && strings.Contains(tar.Output, defineMatcher) { + reqRaw := tar.Output[strings.Index(tar.Output, defineMatcher)+len(defineMatcher) : strings.LastIndex(tar.Output, "\n")] + var req Requirements + err := json.Unmarshal([]byte(reqRaw), &req) + if err != nil { + return nil, fmt.Errorf("failed to parse requirements JSON from test %s/%s: %w", tar.Package, tar.Test, err) + } + err = req.Validate() + if err != nil { + return nil, fmt.Errorf("parsed requirements are invalid JSON from test %s/%s: %w", tar.Package, tar.Test, err) + } + batches = appendTest(batches, tar, req) + } + } + return batches, nil +} + +func appendTest(batches []Batch, tar testActionResult, req Requirements) []Batch { + var set []OS + for _, o := range req.OS { + if o.Arch == "" { + set = append(set, OS{ + Type: o.Type, + Arch: AMD64, + Version: o.Version, + Distro: o.Distro, + DockerVariant: o.DockerVariant, + }) + if o.Type != Windows { + set = append(set, OS{ + Type: o.Type, + Arch: ARM64, + Version: o.Version, + Distro: o.Distro, + DockerVariant: o.DockerVariant, + }) + } + } else { + set = append(set, OS{ + Type: o.Type, + Arch: o.Arch, + Version: o.Version, + Distro: o.Distro, + DockerVariant: o.DockerVariant, + }) + } + } + if len(set) == 0 { + // no os define; means the test supports all + set = defaultOS + } + for _, o := range set { + var batch Batch + batchIdx := findBatchIdx(batches, req.Group, o, req.Stack) + if batchIdx == -1 { + // new batch required + batch = Batch{ + Group: req.Group, + OS: o, + Tests: nil, + SudoTests: nil, + } + batches = append(batches, batch) + batchIdx = len(batches) - 1 + } + batch = batches[batchIdx] + if o.Distro != "" { + batch.OS.Distro = o.Distro + } + if o.Version != "" { + batch.OS.Version = o.Version + } + if o.DockerVariant != "" { + batch.OS.DockerVariant = o.DockerVariant + } + if req.Stack != nil && batch.Stack == nil { + // assign the stack to this batch + batch.Stack = copyStack(req.Stack) + } + if req.Sudo { + batch.SudoTests = appendPackageTest(batch.SudoTests, tar.Package, tar.Test, req.Stack != nil) + } else { + batch.Tests = appendPackageTest(batch.Tests, tar.Package, tar.Test, req.Stack != nil) + } + batches[batchIdx] = batch + } + return batches +} + +func appendPackageTest(tests []BatchPackageTests, pkg string, name string, stack bool) []BatchPackageTests { + for i, pt := range tests { + if pt.Name == pkg { + for _, testName := range pt.Tests { + if testName.Name == name { + // we already selected this test for this package for this batch, + // we can return immediately + return tests + } + } + pt.Tests = append(pt.Tests, BatchPackageTest{ + Name: name, + Stack: stack, + }) + tests[i] = pt + return tests + } + } + var pt BatchPackageTests + pt.Name = pkg + pt.Tests = append(pt.Tests, BatchPackageTest{ + Name: name, + Stack: stack, + }) + tests = append(tests, pt) + return tests +} + +func findBatchIdx(batches []Batch, group string, os OS, stack *Stack) int { + for i, b := range batches { + if b.Group != group { + // must be in the same group + continue + } + if b.OS.Type != os.Type || b.OS.Arch != os.Arch { + // must be same type and arch both are always defined at this point + continue + } + if os.Distro != "" { + // must have the same distro + if b.OS.Distro != "" && b.OS.Distro != os.Distro { + continue + } + } + if os.Version != "" { + // must have the same version + if b.OS.Version != "" && b.OS.Version != os.Version { + continue + } + } + if os.DockerVariant != "" { + // must be the same docker image + if b.OS.DockerVariant != "" && b.OS.DockerVariant != os.DockerVariant { + continue + } + } + if stack == nil { + // don't care if the batch has a cloud or not + return i + } + if b.Stack == nil { + // need cloud, but batch doesn't have cloud calling code can set it + return i + } + if b.Stack.Version == stack.Version { + // same cloud version; compatible + return i + } + } + return -1 +} + +func copyStack(stack *Stack) *Stack { + var s Stack + if stack != nil { + s = *stack + return &s + } + return nil +} + +type testActionResult struct { + Time string `json:"Time"` + Action string `json:"Action"` + Package string `json:"Package"` + Test string `json:"Test"` + Output string `json:"Output"` +} diff --git a/dev-tools/mage/target/srvrlesstest/define/requirements.go b/dev-tools/mage/target/srvrlesstest/define/requirements.go new file mode 100644 index 000000000000..793c80e10230 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/define/requirements.go @@ -0,0 +1,179 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package define + +import ( + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/component" +) + +const ( + // Default constant can be used as the default group for tests. + Default = "default" +) + +const ( + // Darwin is macOS platform + Darwin = component.Darwin + // Linux is Linux platform + Linux = component.Linux + // Windows is Windows platform + Windows = component.Windows + // Kubernetes is Kubernetes platform + Kubernetes = "kubernetes" +) + +const ( + // AMD64 is amd64 architecture + AMD64 = component.AMD64 + // ARM64 is arm64 architecture + ARM64 = component.ARM64 +) + +// OS defines an operating system, architecture, version and distribution combination. +type OS struct { + // Type is the operating system type (darwin, linux or windows). + // + // This is always required to be defined on the OS structure. + // If it is not defined the test runner will error. + Type string `json:"type"` + // Arch is the architecture type (amd64 or arm64). + // + // In the case that it's not provided the test will run on every + // architecture that is supported. + Arch string `json:"arch"` + // Version is a specific version of the OS type to run this test on + // + // When defined the test runs on this specific version only. When not + // defined the test is run on a selected version for this operating system. + Version string `json:"version"` + // Distro allows in the Linux case for a specific distribution to be + // selected for running on. Example would be "ubuntu". In the Kubernetes case + // for a specific distribution of kubernetes. Example would be "kind". + Distro string `json:"distro"` + // DockerVariant allows in the Kubernetes case for a specific variant to + // be selected for running with. Example would be "wolfi". + DockerVariant string `json:"docker_variant"` +} + +// Validate returns an error if not valid. +func (o OS) Validate() error { + if o.Type == "" { + return errors.New("type must be defined") + } + if o.Type != Darwin && o.Type != Linux && o.Type != Windows && o.Type != Kubernetes { + return errors.New("type must be either darwin, linux, windows, or kubernetes") + } + if o.Arch != "" { + if o.Arch != AMD64 && o.Arch != ARM64 { + return errors.New("arch must be either amd64 or arm64") + } + if o.Type == Windows && o.Arch == ARM64 { + return errors.New("windows on arm64 not supported") + } + } + if o.Distro != "" && (o.Type != Linux && o.Type != Kubernetes) { + return errors.New("distro can only be set when type is linux or kubernetes") + } + if o.DockerVariant != "" && o.Type != Kubernetes { + return errors.New("docker variant can only be set when type is kubernetes") + } + return nil +} + +// Stack defines the stack required for the test. +type Stack struct { + // Version defines a specific stack version to create for this test. + // + // In the case that no version is provided the same version being used for + // the current test execution is used. + Version string `json:"version"` +} + +// Requirements defines the testing requirements for the test to run. +type Requirements struct { + // Group must be set on each test to define which group the tests belongs to. + // Tests that are in the same group are executed on the same runner. + // + // Useful when tests take a long time to complete and sharding them across multiple + // hosts can improve the total amount of time to complete all the tests. + Group string `json:"group"` + + // OS defines the operating systems this test can run on. In the case + // multiple are provided the test is ran multiple times one time on each + // combination. + OS []OS `json:"os,omitempty"` + + // Stack defines the stack required for the test. + Stack *Stack `json:"stack,omitempty"` + + // Local defines if this test can safely be performed on a local development machine. + // If not set then the test will not be performed when local only testing is performed. + // + // This doesn't mean this test can only run locally. It will still run on defined OS's + // when a full test run is performed. + Local bool `json:"local"` + + // Sudo defines that this test must run under superuser permissions. On Mac and Linux the + // test gets executed under sudo and on Windows it gets run under Administrator. + Sudo bool `json:"sudo"` +} + +// Validate returns an error if not valid. +func (r Requirements) Validate() error { + if r.Group == "" { + return errors.New("group is required") + } + for i, o := range r.OS { + if err := o.Validate(); err != nil { + return fmt.Errorf("invalid os %d: %w", i, err) + } + } + return nil +} + +// runtimeAllowed returns true if the runtime matches a valid OS. +func (r Requirements) runtimeAllowed(os string, arch string, version string, distro string) bool { + if len(r.OS) == 0 { + // all allowed + return true + } + for _, o := range r.OS { + if o.Type != Kubernetes && o.Type != os { + // not valid on this runtime + continue + } + if o.Arch != "" && o.Arch != arch { + // not allowed on specific architecture + continue + } + if o.Version != "" && o.Version != version { + // not allowed on specific version + continue + } + if o.Distro != "" && o.Distro != distro { + // not allowed on specific distro + continue + } + // allowed + return true + } + // made it this far, not allowed + return false +} diff --git a/dev-tools/mage/target/srvrlesstest/srvrlesstest.go b/dev-tools/mage/target/srvrlesstest/srvrlesstest.go new file mode 100644 index 000000000000..21bfe2767c2f --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/srvrlesstest.go @@ -0,0 +1,428 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package srvrlesstest + +import ( + "context" + "fmt" + tcommon "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ess" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/kubernetes/kind" + multipass "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/multipas" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ogc" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/runner" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/elastic/beats/v7/dev-tools/mage" + "github.com/magefile/mage/mg" +) + +type ProvisionerType uint32 + +var ( + goIntegTestTimeout = 2 * time.Hour + goProvisionAndTestTimeout = goIntegTestTimeout + 30*time.Minute +) + +const ( + snapshotEnv = "SNAPSHOT" +) + +// Integration namespace contains tasks related to operating and running integration tests. +type Integration mg.Namespace + +func IntegRunner(ctx context.Context, matrix bool, singleTest string) error { + if _, ok := ctx.Deadline(); !ok { + // If the context doesn't have a timeout (usually via the mage -t option), give it one. + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, goProvisionAndTestTimeout) + defer cancel() + } + + for { + failedCount, err := integRunnerOnce(ctx, matrix, singleTest) + if err != nil { + return err + } + if failedCount > 0 { + if hasCleanOnExit() { + mg.Deps(Integration.Clean) + } + os.Exit(1) + } + if !hasRunUntilFailure() { + if hasCleanOnExit() { + mg.Deps(Integration.Clean) + } + return nil + } + } +} + +func hasCleanOnExit() bool { + clean := os.Getenv("TEST_INTEG_CLEAN_ON_EXIT") + b, _ := strconv.ParseBool(clean) + return b +} + +func hasRunUntilFailure() bool { + runUntil := os.Getenv("TEST_RUN_UNTIL_FAILURE") + b, _ := strconv.ParseBool(runUntil) + return b +} + +func integRunnerOnce(ctx context.Context, matrix bool, singleTest string) (int, error) { + goTestFlags := os.Getenv("GOTEST_FLAGS") + + batches, err := define.DetermineBatches("testing/integration", goTestFlags, "integration") + if err != nil { + return 0, fmt.Errorf("failed to determine batches: %w", err) + } + r, err := createTestRunner(matrix, singleTest, goTestFlags, batches...) + if err != nil { + return 0, fmt.Errorf("error creating test runner: %w", err) + } + results, err := r.Run(ctx) + if err != nil { + return 0, fmt.Errorf("error running test: %w", err) + } + _ = os.Remove("build/TEST-go-integration.out") + _ = os.Remove("build/TEST-go-integration.out.json") + _ = os.Remove("build/TEST-go-integration.xml") + err = writeFile("build/TEST-go-integration.out", results.Output, 0644) + if err != nil { + return 0, fmt.Errorf("error writing test out file: %w", err) + } + err = writeFile("build/TEST-go-integration.out.json", results.JSONOutput, 0644) + if err != nil { + return 0, fmt.Errorf("error writing test out json file: %w", err) + } + err = writeFile("build/TEST-go-integration.xml", results.XMLOutput, 0644) + if err != nil { + return 0, fmt.Errorf("error writing test out xml file: %w", err) + } + if results.Failures > 0 { + r.Logger().Logf("Testing completed (%d failures, %d successful)", results.Failures, results.Tests-results.Failures) + } else { + r.Logger().Logf("Testing completed (%d successful)", results.Tests) + } + r.Logger().Logf("Console output written here: build/TEST-go-integration.out") + r.Logger().Logf("Console JSON output written here: build/TEST-go-integration.out.json") + r.Logger().Logf("JUnit XML written here: build/TEST-go-integration.xml") + r.Logger().Logf("Diagnostic output (if present) here: build/diagnostics") + return results.Failures, nil +} + +// Clean cleans up the integration testing leftovers +func (Integration) Clean() error { + fmt.Println("--- Clean mage artifacts") + _ = os.RemoveAll(".agent-testing") + + // Clean out .integration-cache/.ogc-cache always + defer os.RemoveAll(".integration-cache") + defer os.RemoveAll(".ogc-cache") + + _, err := os.Stat(".integration-cache") + if err == nil { + // .integration-cache exists; need to run `Clean` from the runner + r, err := createTestRunner(false, "", "") + if err != nil { + return fmt.Errorf("error creating test runner: %w", err) + } + err = r.Clean() + if err != nil { + return fmt.Errorf("error running clean: %w", err) + } + } + + return nil +} + +func createTestRunner(matrix bool, singleTest string, goTestFlags string, batches ...define.Batch) (*runner.Runner, error) { + goVersion, err := mage.DefaultBeatBuildVariableSources.GetGoVersion() + if err != nil { + return nil, err + } + + agentVersion, agentStackVersion, err := getTestRunnerVersions() + if err != nil { + return nil, err + } + + agentBuildDir := os.Getenv("AGENT_BUILD_DIR") + if agentBuildDir == "" { + agentBuildDir = filepath.Join("build", "distributions") + } + essToken, ok, err := ess.GetESSAPIKey() + if err != nil { + return nil, err + } + if !ok { + return nil, fmt.Errorf("ESS api key missing; run 'mage integration:auth'") + } + + // Possible to change the region for deployment, default is gcp-us-west2 which is + // the CFT region. + essRegion := os.Getenv("TEST_INTEG_AUTH_ESS_REGION") + if essRegion == "" { + essRegion = "gcp-us-west2" + } + + serviceTokenPath, ok, err := getGCEServiceTokenPath() + if err != nil { + return nil, err + } + if !ok { + return nil, fmt.Errorf("GCE service token missing; run 'mage integration:auth'") + } + datacenter := os.Getenv("TEST_INTEG_AUTH_GCP_DATACENTER") + if datacenter == "" { + // us-central1-a is used because T2A instances required for ARM64 testing are only + // available in the central regions + datacenter = "us-central1-a" + } + + ogcCfg := ogc.Config{ + ServiceTokenPath: serviceTokenPath, + Datacenter: datacenter, + } + + var instanceProvisioner tcommon.InstanceProvisioner + instanceProvisionerMode := os.Getenv("INSTANCE_PROVISIONER") + switch instanceProvisionerMode { + case "", ogc.Name: + instanceProvisionerMode = ogc.Name + instanceProvisioner, err = ogc.NewProvisioner(ogcCfg) + case multipass.Name: + instanceProvisioner = multipass.NewProvisioner() + case kind.Name: + instanceProvisioner = kind.NewProvisioner() + default: + return nil, fmt.Errorf("INSTANCE_PROVISIONER environment variable must be one of 'ogc' or 'multipass', not %s", instanceProvisionerMode) + } + + email, err := ogcCfg.ClientEmail() + if err != nil { + return nil, err + } + + provisionCfg := ess.ProvisionerConfig{ + Identifier: fmt.Sprintf("at-%s", strings.Replace(strings.Split(email, "@")[0], ".", "-", -1)), + APIKey: essToken, + Region: essRegion, + } + + var stackProvisioner tcommon.StackProvisioner + stackProvisionerMode := os.Getenv("STACK_PROVISIONER") + switch stackProvisionerMode { + case "", ess.ProvisionerStateful: + stackProvisionerMode = ess.ProvisionerStateful + stackProvisioner, err = ess.NewProvisioner(provisionCfg) + if err != nil { + return nil, err + } + case ess.ProvisionerServerless: + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + stackProvisioner, err = ess.NewServerlessProvisioner(ctx, provisionCfg) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("STACK_PROVISIONER environment variable must be one of %q or %q, not %s", + ess.ProvisionerStateful, + ess.ProvisionerServerless, + stackProvisionerMode) + } + + timestamp := timestampEnabled() + + extraEnv := map[string]string{} + if agentCollectDiag := os.Getenv("AGENT_COLLECT_DIAG"); agentCollectDiag != "" { + extraEnv["AGENT_COLLECT_DIAG"] = agentCollectDiag + } + if agentKeepInstalled := os.Getenv("AGENT_KEEP_INSTALLED"); agentKeepInstalled != "" { + extraEnv["AGENT_KEEP_INSTALLED"] = agentKeepInstalled + } + + extraEnv["TEST_LONG_RUNNING"] = os.Getenv("TEST_LONG_RUNNING") + extraEnv["LONG_TEST_RUNTIME"] = os.Getenv("LONG_TEST_RUNTIME") + + // these following two env vars are currently not used by anything, but can be used in the future to test beats or + // other binaries, see https://github.com/elastic/elastic-agent/pull/3258 + binaryName := os.Getenv("TEST_BINARY_NAME") + if binaryName == "" { + binaryName = "elastic-agent" + } + + repoDir := os.Getenv("TEST_INTEG_REPO_PATH") + if repoDir == "" { + repoDir = "." + } + + diagDir := filepath.Join("build", "diagnostics") + _ = os.MkdirAll(diagDir, 0755) + + cfg := tcommon.Config{ + AgentVersion: agentVersion, + StackVersion: agentStackVersion, + BuildDir: agentBuildDir, + GOVersion: goVersion, + RepoDir: repoDir, + DiagnosticsDir: diagDir, + StateDir: ".integration-cache", + Platforms: testPlatforms(), + Packages: testPackages(), + Groups: testGroups(), + Matrix: matrix, + SingleTest: singleTest, + VerboseMode: mg.Verbose(), + Timestamp: timestamp, + TestFlags: goTestFlags, + ExtraEnv: extraEnv, + BinaryName: binaryName, + } + + r, err := runner.NewRunner(cfg, instanceProvisioner, stackProvisioner, batches...) + if err != nil { + return nil, fmt.Errorf("failed to create runner: %w", err) + } + return r, nil +} + +func writeFile(name string, data []byte, perm os.FileMode) error { + err := os.WriteFile(name, data, perm) + if err != nil { + return fmt.Errorf("failed to write file %s: %w", name, err) + } + return nil +} + +func getTestRunnerVersions() (string, string, error) { + var err error + agentStackVersion := os.Getenv("AGENT_STACK_VERSION") + agentVersion := os.Getenv("AGENT_VERSION") + if agentVersion == "" { + agentVersion, err = mage.DefaultBeatBuildVariableSources.GetBeatVersion() + if err != nil { + return "", "", err + } + if agentStackVersion == "" { + // always use snapshot for stack version + agentStackVersion = fmt.Sprintf("%s-SNAPSHOT", agentVersion) + } + if hasSnapshotEnv() { + // in the case that SNAPSHOT=true is set in the environment the + // default version of the agent is used, but as a snapshot build + agentVersion = fmt.Sprintf("%s-SNAPSHOT", agentVersion) + } + } + + if agentStackVersion == "" { + agentStackVersion = agentVersion + } + + return agentVersion, agentStackVersion, nil +} + +func hasSnapshotEnv() bool { + snapshot := os.Getenv(snapshotEnv) + if snapshot == "" { + return false + } + b, _ := strconv.ParseBool(snapshot) + + return b +} + +func getGCEServiceTokenPath() (string, bool, error) { + serviceTokenPath := os.Getenv("TEST_INTEG_AUTH_GCP_SERVICE_TOKEN_FILE") + if serviceTokenPath == "" { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", false, fmt.Errorf("unable to determine user's home directory: %w", err) + } + serviceTokenPath = filepath.Join(homeDir, ".config", "gcloud", "agent-testing-service-token.json") + } + _, err := os.Stat(serviceTokenPath) + if os.IsNotExist(err) { + return serviceTokenPath, false, nil + } else if err != nil { + return serviceTokenPath, false, fmt.Errorf("unable to check for service account key file at %s: %w", serviceTokenPath, err) + } + return serviceTokenPath, true, nil +} + +func timestampEnabled() bool { + timestamp := os.Getenv("TEST_INTEG_TIMESTAMP") + if timestamp == "" { + return false + } + b, _ := strconv.ParseBool(timestamp) + return b +} + +func testPlatforms() []string { + platformsStr := os.Getenv("TEST_PLATFORMS") + if platformsStr == "" { + return nil + } + var platforms []string + for _, p := range strings.Split(platformsStr, " ") { + if p != "" { + platforms = append(platforms, p) + } + } + return platforms +} + +func testPackages() []string { + packagesStr, defined := os.LookupEnv("TEST_PACKAGES") + if !defined { + return nil + } + + var packages []string + for _, p := range strings.Split(packagesStr, ",") { + if p == "tar.gz" { + p = "targz" + } + packages = append(packages, p) + } + + return packages +} + +func testGroups() []string { + groupsStr := os.Getenv("TEST_GROUPS") + if groupsStr == "" { + return nil + } + var groups []string + for _, g := range strings.Split(groupsStr, " ") { + if g != "" { + groups = append(groups, g) + } + } + return groups +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/batch.go b/dev-tools/mage/target/srvrlesstest/testing/common/batch.go new file mode 100644 index 000000000000..54a9929b5483 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/batch.go @@ -0,0 +1,32 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + +// OSBatch defines the mapping between a SupportedOS and a define.Batch. +type OSBatch struct { + // ID is the unique ID for the batch. + ID string + // LayoutOS provides all the OS information to create an instance. + OS SupportedOS + // Batch defines the batch of tests to run on this layout. + Batch define.Batch + // Skip defines if this batch will be skipped because no supported layout exists yet. + Skip bool +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/build.go b/dev-tools/mage/target/srvrlesstest/testing/common/build.go new file mode 100644 index 000000000000..e994a0e08bc4 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/build.go @@ -0,0 +1,32 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +// Build describes a build and its paths. +type Build struct { + // Version of the Elastic Agent build. + Version string + // Type of OS this build is for. + Type string + // Arch is architecture this build is for. + Arch string + // Path is the path to the build. + Path string + // SHA512 is the path to the SHA512 file. + SHA512Path string +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/config.go b/dev-tools/mage/target/srvrlesstest/testing/common/config.go new file mode 100644 index 000000000000..1eb81d05c795 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/config.go @@ -0,0 +1,147 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import ( + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "strings" +) + +// Config provides the configuration for running the runner. +type Config struct { + AgentVersion string + StateDir string + ReleaseVersion string + StackVersion string + BuildDir string + GOVersion string + RepoDir string + DiagnosticsDir string + + // Platforms filters the tests to only run on the provided list + // of platforms even if the tests supports more than what is + // defined in this list. + Platforms []string + + // Packages filters the tests to only run on the provided list + // of platforms even if the tests supports more than what is + // defined in this list. + Packages []string + + // BinaryName is the name of the binary package under test, i.e, elastic-agent, metricbeat, etc + // this is used to copy the .tar.gz to the remote host + BinaryName string + + // Groups filters the tests to only run tests that are part of + // the groups defined in this list. + Groups []string + + // Matrix enables matrix testing. This explodes each test to + // run on all supported platforms the runner supports. + Matrix bool + + // SingleTest only has the runner run that specific test. + SingleTest string + + // VerboseMode passed along a verbose mode flag to tests + VerboseMode bool + + // Timestamp enables timestamps on the console output. + Timestamp bool + + // Testflags contains extra go test flags to be set when running tests + TestFlags string + + // ExtraEnv contains extra environment flags to pass to the runner. + ExtraEnv map[string]string +} + +// Validate returns an error if the information is invalid. +func (c *Config) Validate() error { + if c.AgentVersion == "" { + return errors.New("field AgentVersion must be set") + } + if c.StackVersion == "" { + return errors.New("field StackVersion must be set") + } + if c.BuildDir == "" { + return errors.New("field BuildDir must be set") + } + if c.GOVersion == "" { + return errors.New("field GOVersion must be set") + } + if c.RepoDir == "" { + return errors.New("field RepoDir must be set") + } + if c.StateDir == "" { + return errors.New("field StateDir must be set") + } + _, err := c.GetPlatforms() + if err != nil { + return err + } + return nil +} + +// GetPlatforms returns the defined platforms for the configuration. +func (c *Config) GetPlatforms() ([]define.OS, error) { + var each []define.OS + for _, platform := range c.Platforms { + o, err := parsePlatform(platform) + if err != nil { + return nil, err + } + each = append(each, o) + } + return each, nil +} + +func parsePlatform(platform string) (define.OS, error) { + separated := strings.Split(platform, "/") + var os define.OS + switch len(separated) { + case 0: + return define.OS{}, fmt.Errorf("failed to parse platform string %q: empty string", platform) + case 1: + os = define.OS{Type: separated[0]} + case 2: + os = define.OS{Type: separated[0], Arch: separated[1]} + case 3: + if separated[0] == define.Linux { + os = define.OS{Type: separated[0], Arch: separated[1], Distro: separated[2]} + } else { + os = define.OS{Type: separated[0], Arch: separated[1], Version: separated[2]} + } + case 4: + if separated[0] == define.Linux { + os = define.OS{Type: separated[0], Arch: separated[1], Distro: separated[2], Version: separated[3]} + } else if separated[0] == define.Kubernetes { + os = define.OS{Type: separated[0], Arch: separated[1], Version: separated[2], DockerVariant: separated[3]} + } else { + return define.OS{}, fmt.Errorf("failed to parse platform string %q: more than 2 separators", platform) + } + default: + return define.OS{}, fmt.Errorf("failed to parse platform string %q: more than 3 separators", platform) + } + if err := os.Validate(); err != nil { + return define.OS{}, fmt.Errorf("failed to parse platform string %q: %w", platform, err) + } + return os, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/instance.go b/dev-tools/mage/target/srvrlesstest/testing/common/instance.go new file mode 100644 index 000000000000..c87bd4c10e4d --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/instance.go @@ -0,0 +1,79 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import ( + "context" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" +) + +type ProvisionerType uint32 + +const ( + ProvisionerTypeVM ProvisionerType = iota + ProvisionerTypeK8SCluster +) + +// Instance represents a provisioned instance. +type Instance struct { + // Provider is the instance provider for the instance. + // See INSTANCE_PROVISIONER environment variable for the supported providers. + Provider string `yaml:"provider"` + // ID is the identifier of the instance. + // + // This must be the same ID of the OSBatch. + ID string `yaml:"id"` + // Name is the nice-name of the instance. + Name string `yaml:"name"` + // Provisioner is the instance provider for the instance. + // See INSTANCE_PROVISIONER environment variable for the supported Provisioner. + Provisioner string `yaml:"provisioner"` + // IP is the IP address of the instance. + IP string `yaml:"ip"` + // Username is the username used to SSH to the instance. + Username string `yaml:"username"` + // RemotePath is the based path used for performing work on the instance. + RemotePath string `yaml:"remote_path"` + // Internal holds internal information used by the provisioner. + // Best to not touch the contents of this, and leave it be for + // the provisioner. + Internal map[string]interface{} `yaml:"internal"` +} + +// InstanceProvisioner performs the provisioning of instances. +type InstanceProvisioner interface { + // Name returns the name of the instance provisioner. + Name() string + + // Type returns the type of the provisioner. + Type() ProvisionerType + + // SetLogger sets the logger for it to use. + SetLogger(l Logger) + + // Supported returns true of false if the provisioner supports the given batch. + Supported(batch define.OS) bool + + // Provision brings up the machines. + // + // The provision should re-use already prepared instances when possible. + Provision(ctx context.Context, cfg Config, batches []OSBatch) ([]Instance, error) + + // Clean cleans up all provisioned resources. + Clean(ctx context.Context, cfg Config, instances []Instance) error +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/logger.go b/dev-tools/mage/target/srvrlesstest/testing/common/logger.go new file mode 100644 index 000000000000..3dd641c03075 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/logger.go @@ -0,0 +1,24 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +// Logger is a simple logging interface used by each runner type. +type Logger interface { + // Logf logs the message for this runner. + Logf(format string, args ...any) +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/prefix_output.go b/dev-tools/mage/target/srvrlesstest/testing/common/prefix_output.go new file mode 100644 index 000000000000..56b0250a8d45 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/prefix_output.go @@ -0,0 +1,74 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import ( + "bytes" + "strings" +) + +// prefixOutput is an `io.Writer` that prefixes each written line with the provided prefix text +type prefixOutput struct { + logger Logger + prefix string + remainder []byte +} + +// NewPrefixOutput creates a prefix output `io.Writer`. +func NewPrefixOutput(logger Logger, prefix string) *prefixOutput { + return &prefixOutput{ + logger: logger, + prefix: prefix, + } +} + +func (r *prefixOutput) Write(p []byte) (int, error) { + if len(p) == 0 { + // nothing to do + return 0, nil + } + offset := 0 + for { + idx := bytes.IndexByte(p[offset:], '\n') + if idx < 0 { + // not all used add to remainder to be used on next call + r.remainder = append(r.remainder, p[offset:]...) + return len(p), nil + } + + var line []byte + if r.remainder != nil { + line = r.remainder + r.remainder = nil + line = append(line, p[offset:offset+idx]...) + } else { + line = append(line, p[offset:offset+idx]...) + } + offset += idx + 1 + // drop '\r' from line (needed for Windows) + if len(line) > 0 && line[len(line)-1] == '\r' { + line = line[0 : len(line)-1] + } + if len(line) == 0 { + // empty line + continue + } + str := strings.TrimSpace(string(line)) + r.logger.Logf("%s%s", r.prefix, str) + } +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/runner.go b/dev-tools/mage/target/srvrlesstest/testing/common/runner.go new file mode 100644 index 000000000000..6ebc48c92b77 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/runner.go @@ -0,0 +1,57 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import ( + "context" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" +) + +// OSRunnerPackageResult is the result for each package. +type OSRunnerPackageResult struct { + // Name is the package name. + Name string + // Output is the raw test output. + Output []byte + // XMLOutput is the XML Junit output. + XMLOutput []byte + // JSONOutput is the JSON output. + JSONOutput []byte +} + +// OSRunnerResult is the result of the test run provided by a OSRunner. +type OSRunnerResult struct { + // Packages is the results for each package. + Packages []OSRunnerPackageResult + + // SudoPackages is the results for each package that need to run as sudo. + SudoPackages []OSRunnerPackageResult +} + +// OSRunner provides an interface to run the tests on the OS. +type OSRunner interface { + // Prepare prepares the runner to actual run on the host. + Prepare(ctx context.Context, sshClient ssh.SSHClient, logger Logger, arch string, goVersion string) error + // Copy places the required files on the host. + Copy(ctx context.Context, sshClient ssh.SSHClient, logger Logger, repoArchive string, builds []Build) error + // Run runs the actual tests and provides the result. + Run(ctx context.Context, verbose bool, sshClient ssh.SSHClient, logger Logger, agentVersion string, prefix string, batch define.Batch, env map[string]string) (OSRunnerResult, error) + // Diagnostics gathers any diagnostics from the host. + Diagnostics(ctx context.Context, sshClient ssh.SSHClient, logger Logger, destination string) error +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/stack.go b/dev-tools/mage/target/srvrlesstest/testing/common/stack.go new file mode 100644 index 000000000000..913435bf9471 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/stack.go @@ -0,0 +1,89 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import "context" + +// Stack is a created stack. +type Stack struct { + // ID is the identifier of the instance. + // + // This must be the same ID used for requesting a stack. + ID string `yaml:"id"` + + // Provisioner is the stack provisioner. See STACK_PROVISIONER environment + // variable for the supported provisioners. + Provisioner string `yaml:"provisioner"` + + // Version is the version of the stack. + Version string `yaml:"version"` + + // Ready determines if the stack is ready to be used. + Ready bool `yaml:"ready"` + + // Elasticsearch is the URL to communicate with elasticsearch. + Elasticsearch string `yaml:"elasticsearch"` + + // Kibana is the URL to communication with kibana. + Kibana string `yaml:"kibana"` + + // Username is the username. + Username string `yaml:"username"` + + // Password is the password. + Password string `yaml:"password"` + + // Internal holds internal information used by the provisioner. + // Best to not touch the contents of this, and leave it be for + // the provisioner. + Internal map[string]interface{} `yaml:"internal"` +} + +// Same returns true if other is the same stack as this one. +// Two stacks are considered the same if their provisioner and ID are the same. +func (s Stack) Same(other Stack) bool { + return s.Provisioner == other.Provisioner && + s.ID == other.ID +} + +// StackRequest request for a new stack. +type StackRequest struct { + // ID is the unique ID for the stack. + ID string `yaml:"id"` + + // Version is the version of the stack. + Version string `yaml:"version"` +} + +// StackProvisioner performs the provisioning of stacks. +type StackProvisioner interface { + // Name returns the name of the stack provisioner. + Name() string + + // SetLogger sets the logger for it to use. + SetLogger(l Logger) + + // Create creates a stack. + Create(ctx context.Context, request StackRequest) (Stack, error) + + // WaitForReady should block until the stack is ready or the context is cancelled. + WaitForReady(ctx context.Context, stack Stack) (Stack, error) + + // Delete deletes the stack. + Delete(ctx context.Context, stack Stack) error +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/common/supported.go b/dev-tools/mage/target/srvrlesstest/testing/common/supported.go new file mode 100644 index 000000000000..130ddef9ec7d --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/common/supported.go @@ -0,0 +1,28 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package common + +import "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + +// SupportedOS maps a OS definition to a OSRunner. +type SupportedOS struct { + define.OS + + // Runner is the runner to use for the OS. + Runner OSRunner +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/define/batch.go b/dev-tools/mage/target/srvrlesstest/testing/define/batch.go new file mode 100644 index 000000000000..c8b6ac65a0e0 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/define/batch.go @@ -0,0 +1,320 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package define + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "os/exec" + "path/filepath" + "strings" +) + +// defaultOS is the set of OS that are used in the case that a requirement doesn't define any +var defaultOS = []OS{ + { + Type: Darwin, + Arch: AMD64, + }, + { + Type: Darwin, + Arch: ARM64, + }, + { + Type: Linux, + Arch: AMD64, + }, + { + Type: Linux, + Arch: ARM64, + }, + { + Type: Windows, + Arch: AMD64, + }, +} + +// Batch is a grouping of tests that all have the same requirements. +type Batch struct { + // Group must be set on each test to define which group the tests belongs. + // Tests that are in the same group are executed on the same runner. + Group string `json:"group"` + + // OS defines the operating systems this test batch needs. + OS OS `json:"os"` + + // Stack defines the stack required for this batch. + Stack *Stack `json:"stack,omitempty"` + + // Tests define the set of packages and tests that do not require sudo + // privileges to be performed. + Tests []BatchPackageTests `json:"tests"` + + // SudoTests define the set of packages and tests that do require sudo + // privileges to be performed. + SudoTests []BatchPackageTests `json:"sudo_tests"` +} + +// BatchPackageTests is a package and its tests that belong to a batch. +type BatchPackageTests struct { + // Name is the package name. + Name string `json:"name"` + // Tests is the set of tests in the package. + Tests []BatchPackageTest `json:"tests"` +} + +// BatchPackageTest is a specific test in a package. +type BatchPackageTest struct { + // Name of the test. + Name string `json:"name"` + // Stack needed for test. + Stack bool `json:"stack"` +} + +// DetermineBatches parses the package directory with the possible extra build +// tags to determine the set of batches for the package. +func DetermineBatches(dir string, testFlags string, buildTags ...string) ([]Batch, error) { + const ( + defineMatcher = "define skip; requirements: " + ) + + // the 'define' build tag is added so that the `define.Require` skips and + // logs the requirements for each test. + buildTags = append(buildTags, "define") + + // 'go test' wants a directory path to either be absolute or start with + // './' so it knows it's a directory and not package. + if !filepath.IsAbs(dir) && !strings.HasPrefix(dir, "./") { + dir = "./" + dir + } + + // run 'go test' and collect the JSON output to be parsed + // #nosec G204 -- test function code, it will be okay + cmdArgs := []string{"test", "-v", "--tags", strings.Join(buildTags, ","), "-json"} + if testFlags != "" { + flags := strings.Split(testFlags, " ") + cmdArgs = append(cmdArgs, flags...) + } + + cmdArgs = append(cmdArgs, dir) + testCmd := exec.Command("go", cmdArgs...) + output, err := testCmd.Output() + if err != nil { + // format cmdArgs to make the error message more coherent + cmdArgs = append([]string{"go"}, cmdArgs...) + + var errExit *exec.ExitError + if errors.As(err, &errExit) { + b := bytes.NewBuffer(errExit.Stderr) + b.Write(output) + output = b.Bytes() + } + return nil, fmt.Errorf( + "error running go test: (%w), got:\n\n%s\ntried to run: %v", + err, string(output), cmdArgs) + } + + // parses each test and determine the batches that each test belongs in + var batches []Batch + sc := bufio.NewScanner(bytes.NewReader(output)) + for sc.Scan() { + var tar testActionResult + err := json.Unmarshal([]byte(sc.Text()), &tar) + if err != nil { + return nil, err + } + if tar.Action == "output" && strings.Contains(tar.Output, defineMatcher) { + reqRaw := tar.Output[strings.Index(tar.Output, defineMatcher)+len(defineMatcher) : strings.LastIndex(tar.Output, "\n")] + var req Requirements + err := json.Unmarshal([]byte(reqRaw), &req) + if err != nil { + return nil, fmt.Errorf("failed to parse requirements JSON from test %s/%s: %w", tar.Package, tar.Test, err) + } + err = req.Validate() + if err != nil { + return nil, fmt.Errorf("parsed requirements are invalid JSON from test %s/%s: %w", tar.Package, tar.Test, err) + } + batches = appendTest(batches, tar, req) + } + } + return batches, nil +} + +func appendTest(batches []Batch, tar testActionResult, req Requirements) []Batch { + var set []OS + for _, o := range req.OS { + if o.Arch == "" { + set = append(set, OS{ + Type: o.Type, + Arch: AMD64, + Version: o.Version, + Distro: o.Distro, + DockerVariant: o.DockerVariant, + }) + if o.Type != Windows { + set = append(set, OS{ + Type: o.Type, + Arch: ARM64, + Version: o.Version, + Distro: o.Distro, + DockerVariant: o.DockerVariant, + }) + } + } else { + set = append(set, OS{ + Type: o.Type, + Arch: o.Arch, + Version: o.Version, + Distro: o.Distro, + DockerVariant: o.DockerVariant, + }) + } + } + if len(set) == 0 { + // no os define; means the test supports all + set = defaultOS + } + for _, o := range set { + var batch Batch + batchIdx := findBatchIdx(batches, req.Group, o, req.Stack) + if batchIdx == -1 { + // new batch required + batch = Batch{ + Group: req.Group, + OS: o, + Tests: nil, + SudoTests: nil, + } + batches = append(batches, batch) + batchIdx = len(batches) - 1 + } + batch = batches[batchIdx] + if o.Distro != "" { + batch.OS.Distro = o.Distro + } + if o.Version != "" { + batch.OS.Version = o.Version + } + if o.DockerVariant != "" { + batch.OS.DockerVariant = o.DockerVariant + } + if req.Stack != nil && batch.Stack == nil { + // assign the stack to this batch + batch.Stack = copyStack(req.Stack) + } + if req.Sudo { + batch.SudoTests = appendPackageTest(batch.SudoTests, tar.Package, tar.Test, req.Stack != nil) + } else { + batch.Tests = appendPackageTest(batch.Tests, tar.Package, tar.Test, req.Stack != nil) + } + batches[batchIdx] = batch + } + return batches +} + +func appendPackageTest(tests []BatchPackageTests, pkg string, name string, stack bool) []BatchPackageTests { + for i, pt := range tests { + if pt.Name == pkg { + for _, testName := range pt.Tests { + if testName.Name == name { + // we already selected this test for this package for this batch, + // we can return immediately + return tests + } + } + pt.Tests = append(pt.Tests, BatchPackageTest{ + Name: name, + Stack: stack, + }) + tests[i] = pt + return tests + } + } + var pt BatchPackageTests + pt.Name = pkg + pt.Tests = append(pt.Tests, BatchPackageTest{ + Name: name, + Stack: stack, + }) + tests = append(tests, pt) + return tests +} + +func findBatchIdx(batches []Batch, group string, os OS, stack *Stack) int { + for i, b := range batches { + if b.Group != group { + // must be in the same group + continue + } + if b.OS.Type != os.Type || b.OS.Arch != os.Arch { + // must be same type and arch both are always defined at this point + continue + } + if os.Distro != "" { + // must have the same distro + if b.OS.Distro != "" && b.OS.Distro != os.Distro { + continue + } + } + if os.Version != "" { + // must have the same version + if b.OS.Version != "" && b.OS.Version != os.Version { + continue + } + } + if os.DockerVariant != "" { + // must be the same docker image + if b.OS.DockerVariant != "" && b.OS.DockerVariant != os.DockerVariant { + continue + } + } + if stack == nil { + // don't care if the batch has a cloud or not + return i + } + if b.Stack == nil { + // need cloud, but batch doesn't have cloud calling code can set it + return i + } + if b.Stack.Version == stack.Version { + // same cloud version; compatible + return i + } + } + return -1 +} + +func copyStack(stack *Stack) *Stack { + var s Stack + if stack != nil { + s = *stack + return &s + } + return nil +} + +type testActionResult struct { + Time string `json:"Time"` + Action string `json:"Action"` + Package string `json:"Package"` + Test string `json:"Test"` + Output string `json:"Output"` +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/define/requirements.go b/dev-tools/mage/target/srvrlesstest/testing/define/requirements.go new file mode 100644 index 000000000000..a9fac1ed4672 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/define/requirements.go @@ -0,0 +1,180 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package define + +import ( + "errors" + "fmt" + + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/component" +) + +const ( + // Default constant can be used as the default group for tests. + Default = "default" +) + +const ( + // Darwin is macOS platform + Darwin = component.Darwin + // Linux is Linux platform + Linux = component.Linux + // Windows is Windows platform + Windows = component.Windows + // Kubernetes is Kubernetes platform + Kubernetes = "kubernetes" +) + +const ( + // AMD64 is amd64 architecture + AMD64 = component.AMD64 + // ARM64 is arm64 architecture + ARM64 = component.ARM64 +) + +// OS defines an operating system, architecture, version and distribution combination. +type OS struct { + // Type is the operating system type (darwin, linux or windows). + // + // This is always required to be defined on the OS structure. + // If it is not defined the test runner will error. + Type string `json:"type"` + // Arch is the architecture type (amd64 or arm64). + // + // In the case that it's not provided the test will run on every + // architecture that is supported. + Arch string `json:"arch"` + // Version is a specific version of the OS type to run this test on + // + // When defined the test runs on this specific version only. When not + // defined the test is run on a selected version for this operating system. + Version string `json:"version"` + // Distro allows in the Linux case for a specific distribution to be + // selected for running on. Example would be "ubuntu". In the Kubernetes case + // for a specific distribution of kubernetes. Example would be "kind". + Distro string `json:"distro"` + // DockerVariant allows in the Kubernetes case for a specific variant to + // be selected for running with. Example would be "wolfi". + DockerVariant string `json:"docker_variant"` +} + +// Validate returns an error if not valid. +func (o OS) Validate() error { + if o.Type == "" { + return errors.New("type must be defined") + } + if o.Type != Darwin && o.Type != Linux && o.Type != Windows && o.Type != Kubernetes { + return errors.New("type must be either darwin, linux, windows, or kubernetes") + } + if o.Arch != "" { + if o.Arch != AMD64 && o.Arch != ARM64 { + return errors.New("arch must be either amd64 or arm64") + } + if o.Type == Windows && o.Arch == ARM64 { + return errors.New("windows on arm64 not supported") + } + } + if o.Distro != "" && (o.Type != Linux && o.Type != Kubernetes) { + return errors.New("distro can only be set when type is linux or kubernetes") + } + if o.DockerVariant != "" && o.Type != Kubernetes { + return errors.New("docker variant can only be set when type is kubernetes") + } + return nil +} + +// Stack defines the stack required for the test. +type Stack struct { + // Version defines a specific stack version to create for this test. + // + // In the case that no version is provided the same version being used for + // the current test execution is used. + Version string `json:"version"` +} + +// Requirements defines the testing requirements for the test to run. +type Requirements struct { + // Group must be set on each test to define which group the tests belongs to. + // Tests that are in the same group are executed on the same runner. + // + // Useful when tests take a long time to complete and sharding them across multiple + // hosts can improve the total amount of time to complete all the tests. + Group string `json:"group"` + + // OS defines the operating systems this test can run on. In the case + // multiple are provided the test is ran multiple times one time on each + // combination. + OS []OS `json:"os,omitempty"` + + // Stack defines the stack required for the test. + Stack *Stack `json:"stack,omitempty"` + + // Local defines if this test can safely be performed on a local development machine. + // If not set then the test will not be performed when local only testing is performed. + // + // This doesn't mean this test can only run locally. It will still run on defined OS's + // when a full test run is performed. + Local bool `json:"local"` + + // Sudo defines that this test must run under superuser permissions. On Mac and Linux the + // test gets executed under sudo and on Windows it gets run under Administrator. + Sudo bool `json:"sudo"` +} + +// Validate returns an error if not valid. +func (r Requirements) Validate() error { + if r.Group == "" { + return errors.New("group is required") + } + for i, o := range r.OS { + if err := o.Validate(); err != nil { + return fmt.Errorf("invalid os %d: %w", i, err) + } + } + return nil +} + +// runtimeAllowed returns true if the runtime matches a valid OS. +func (r Requirements) runtimeAllowed(os string, arch string, version string, distro string) bool { + if len(r.OS) == 0 { + // all allowed + return true + } + for _, o := range r.OS { + if o.Type != Kubernetes && o.Type != os { + // not valid on this runtime + continue + } + if o.Arch != "" && o.Arch != arch { + // not allowed on specific architecture + continue + } + if o.Version != "" && o.Version != version { + // not allowed on specific version + continue + } + if o.Distro != "" && o.Distro != distro { + // not allowed on specific distro + continue + } + // allowed + return true + } + // made it this far, not allowed + return false +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/client.go b/dev-tools/mage/target/srvrlesstest/testing/ess/client.go new file mode 100644 index 000000000000..6ff144c889b8 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/client.go @@ -0,0 +1,79 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ess + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" +) + +type Client struct { + config *Config + client *http.Client +} + +func NewClient(config Config) *Client { + cfg := defaultConfig() + cfg.Merge(config) + + c := new(Client) + c.client = http.DefaultClient + c.config = cfg + + return c +} + +func (c *Client) doGet(ctx context.Context, relativeUrl string) (*http.Response, error) { + u, err := url.JoinPath(c.config.BaseUrl, relativeUrl) + if err != nil { + return nil, fmt.Errorf("unable to create API URL: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return nil, fmt.Errorf("unable to create GET request: %w", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", c.config.ApiKey)) + + return c.client.Do(req) +} + +func (c *Client) doPost(ctx context.Context, relativeUrl, contentType string, body io.Reader) (*http.Response, error) { + u, err := url.JoinPath(c.config.BaseUrl, relativeUrl) + if err != nil { + return nil, fmt.Errorf("unable to create API URL: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, u, body) + if err != nil { + return nil, fmt.Errorf("unable to create POST request: %w", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", c.config.ApiKey)) + req.Header.Set("Content-Type", contentType) + + return c.client.Do(req) +} + +func (c *Client) BaseURL() string { + return c.config.BaseUrl +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/config.go b/dev-tools/mage/target/srvrlesstest/testing/ess/config.go new file mode 100644 index 000000000000..6f886022e7cb --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/config.go @@ -0,0 +1,86 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ess + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +type Config struct { + BaseUrl string `json:"base_url" yaml:"base_url"` + ApiKey string `json:"api_key" yaml:"api_key"` +} + +func defaultConfig() *Config { + baseURL := os.Getenv("TEST_INTEG_AUTH_ESS_URL") + if baseURL == "" { + baseURL = "https://cloud.elastic.co" + } + url := strings.TrimRight(baseURL, "/") + "/api/v1" + return &Config{ + BaseUrl: url, + } +} + +// Merge overlays the provided configuration on top of +// this configuration. +func (c *Config) Merge(anotherConfig Config) { + if anotherConfig.BaseUrl != "" { + c.BaseUrl = anotherConfig.BaseUrl + } + + if anotherConfig.ApiKey != "" { + c.ApiKey = anotherConfig.ApiKey + } +} + +// GetESSAPIKey returns the ESS API key, if it exists +func GetESSAPIKey() (string, bool, error) { + essAPIKeyFile, err := GetESSAPIKeyFilePath() + if err != nil { + return "", false, err + } + _, err = os.Stat(essAPIKeyFile) + if os.IsNotExist(err) { + return "", false, nil + } else if err != nil { + return "", false, fmt.Errorf("unable to check if ESS config directory exists: %w", err) + } + data, err := os.ReadFile(essAPIKeyFile) + if err != nil { + return "", true, fmt.Errorf("unable to read ESS API key: %w", err) + } + essAPIKey := strings.TrimSpace(string(data)) + return essAPIKey, true, nil +} + +// GetESSAPIKeyFilePath returns the path to the ESS API key file +func GetESSAPIKeyFilePath() (string, error) { + essAPIKeyFile := os.Getenv("TEST_INTEG_AUTH_ESS_APIKEY_FILE") + if essAPIKeyFile == "" { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("unable to determine user's home directory: %w", err) + } + essAPIKeyFile = filepath.Join(homeDir, ".config", "ess", "api_key.txt") + } + return essAPIKeyFile, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_csp_configuration.yaml b/dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_csp_configuration.yaml new file mode 100644 index 000000000000..199f664a65a6 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_csp_configuration.yaml @@ -0,0 +1,15 @@ +gcp: + integrations_server_conf_id: "gcp.integrationsserver.n2.68x32x45.2" + elasticsearch_conf_id: "gcp.es.datahot.n2.68x10x45" + elasticsearch_deployment_template_id: "gcp-storage-optimized-v5" + kibana_instance_configuration_id: "gcp.kibana.n2.68x32x45" +azure: + integrations_server_conf_id: "azure.integrationsserver.fsv2.2" + elasticsearch_conf_id: "azure.es.datahot.edsv4" + elasticsearch_deployment_template_id: "azure-storage-optimized-v2" + kibana_instance_configuration_id: "azure.kibana.fsv2" +aws: + integrations_server_conf_id: "aws.integrationsserver.c5d.2.1" + elasticsearch_conf_id: "aws.es.datahot.i3.1.1" + elasticsearch_deployment_template_id: "aws-storage-optimized-v5" + kibana_instance_configuration_id: "aws.kibana.c5d.1.1" \ No newline at end of file diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_request.tmpl.json b/dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_request.tmpl.json new file mode 100644 index 000000000000..3ef93868708f --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/create_deployment_request.tmpl.json @@ -0,0 +1,102 @@ +{ + "resources": { + "integrations_server": [ + { + "elasticsearch_cluster_ref_id": "main-elasticsearch", + "region": "{{ .request.Region }}", + "plan": { + "cluster_topology": [ + { + "instance_configuration_id": "{{ .integrations_server_conf_id }}", + "zone_count": 1, + "size": { + "resource": "memory", + "value": 1024 + } + } + ], + "integrations_server": { + "version": "{{ .request.Version }}" + } + }, + "ref_id": "main-integrations_server" + } + ], + "elasticsearch": [ + { + "region": "{{ .request.Region }}", + "settings": { + "dedicated_masters_threshold": 6 + }, + "plan": { + "cluster_topology": [ + { + "zone_count": 1, + "elasticsearch": { + "node_attributes": { + "data": "hot" + } + }, + "instance_configuration_id": "{{.elasticsearch_conf_id}}", + "node_roles": [ + "master", + "ingest", + "transform", + "data_hot", + "remote_cluster_client", + "data_content" + ], + "id": "hot_content", + "size": { + "resource": "memory", + "value": 8192 + } + } + ], + "elasticsearch": { + "version": "{{ .request.Version }}", + "enabled_built_in_plugins": [] + }, + "deployment_template": { + "id": "{{ .elasticsearch_deployment_template_id }}" + } + }, + "ref_id": "main-elasticsearch" + } + ], + "enterprise_search": [], + "kibana": [ + { + "elasticsearch_cluster_ref_id": "main-elasticsearch", + "region": "{{ .request.Region }}", + "plan": { + "cluster_topology": [ + { + "instance_configuration_id": "{{.kibana_instance_configuration_id}}", + "zone_count": 1, + "size": { + "resource": "memory", + "value": 1024 + } + } + ], + "kibana": { + "version": "{{ .request.Version }}", + "user_settings_json": { + "xpack.fleet.enableExperimental": ["agentTamperProtectionEnabled"] + } + } + }, + "ref_id": "main-kibana" + } + ] + }, + "settings": { + "autoscaling_enabled": false + }, + "name": "{{ .request.Name }}", + "metadata": { + "system_owned": false, + "tags": {{ json .request.Tags }} + } +} \ No newline at end of file diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/deployment.go b/dev-tools/mage/target/srvrlesstest/testing/ess/deployment.go new file mode 100644 index 000000000000..9170e9d3dcb0 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/deployment.go @@ -0,0 +1,401 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ess + +import ( + "bytes" + "context" + _ "embed" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "text/template" + "time" + + "gopkg.in/yaml.v2" +) + +type Tag struct { + Key string `json:"key"` + Value string `json:"value"` +} + +type CreateDeploymentRequest struct { + Name string `json:"name"` + Region string `json:"region"` + Version string `json:"version"` + Tags []Tag `json:"tags"` +} + +type CreateDeploymentResponse struct { + ID string `json:"id"` + + ElasticsearchEndpoint string + KibanaEndpoint string + + Username string + Password string +} + +type GetDeploymentResponse struct { + Elasticsearch struct { + Status DeploymentStatus + ServiceUrl string + } + Kibana struct { + Status DeploymentStatus + ServiceUrl string + } + IntegrationsServer struct { + Status DeploymentStatus + ServiceUrl string + } +} + +type DeploymentStatus string + +func (d *DeploymentStatus) UnmarshalJSON(data []byte) error { + var status string + if err := json.Unmarshal(data, &status); err != nil { + return err + } + + switch status { + case string(DeploymentStatusInitializing), string(DeploymentStatusReconfiguring), string(DeploymentStatusStarted): + *d = DeploymentStatus(status) + default: + return fmt.Errorf("unknown status: [%s]", status) + } + + return nil +} + +func (d *DeploymentStatus) String() string { + return string(*d) +} + +const ( + DeploymentStatusInitializing DeploymentStatus = "initializing" + DeploymentStatusReconfiguring DeploymentStatus = "reconfiguring" + DeploymentStatusStarted DeploymentStatus = "started" +) + +type DeploymentStatusResponse struct { + Overall DeploymentStatus + + Elasticsearch DeploymentStatus + Kibana DeploymentStatus + IntegrationsServer DeploymentStatus +} + +// CreateDeployment creates the deployment with the specified configuration. +func (c *Client) CreateDeployment(ctx context.Context, req CreateDeploymentRequest) (*CreateDeploymentResponse, error) { + reqBodyBytes, err := generateCreateDeploymentRequestBody(req) + if err != nil { + return nil, err + } + + createResp, err := c.doPost( + ctx, + "deployments", + "application/json", + bytes.NewReader(reqBodyBytes), + ) + if err != nil { + return nil, fmt.Errorf("error calling deployment creation API: %w", err) + } + defer createResp.Body.Close() + + var createRespBody struct { + ID string `json:"id"` + Resources []struct { + Kind string `json:"kind"` + Credentials struct { + Username string `json:"username"` + Password string `json:"password"` + } `json:"credentials"` + } `json:"resources"` + Errors []struct { + Code string `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + + if err := json.NewDecoder(createResp.Body).Decode(&createRespBody); err != nil { + return nil, fmt.Errorf("error parsing deployment creation API response: %w", err) + } + + if len(createRespBody.Errors) > 0 { + return nil, fmt.Errorf("failed to create: (%s) %s", createRespBody.Errors[0].Code, createRespBody.Errors[0].Message) + } + + r := CreateDeploymentResponse{ + ID: createRespBody.ID, + } + + for _, resource := range createRespBody.Resources { + if resource.Kind == "elasticsearch" { + r.Username = resource.Credentials.Username + r.Password = resource.Credentials.Password + break + } + } + + // Get Elasticsearch and Kibana endpoint URLs + getResp, err := c.getDeployment(ctx, r.ID) + if err != nil { + return nil, fmt.Errorf("error calling deployment retrieval API: %w", err) + } + defer getResp.Body.Close() + + var getRespBody struct { + Resources struct { + Elasticsearch []struct { + Info struct { + Metadata struct { + ServiceUrl string `json:"service_url"` + } `json:"metadata"` + } `json:"info"` + } `json:"elasticsearch"` + Kibana []struct { + Info struct { + Metadata struct { + ServiceUrl string `json:"service_url"` + } `json:"metadata"` + } `json:"info"` + } `json:"kibana"` + } `json:"resources"` + } + + if err := json.NewDecoder(getResp.Body).Decode(&getRespBody); err != nil { + return nil, fmt.Errorf("error parsing deployment retrieval API response: %w", err) + } + + r.ElasticsearchEndpoint = getRespBody.Resources.Elasticsearch[0].Info.Metadata.ServiceUrl + r.KibanaEndpoint = getRespBody.Resources.Kibana[0].Info.Metadata.ServiceUrl + + return &r, nil +} + +// ShutdownDeployment attempts to shut down the ESS deployment with the specified ID. +func (c *Client) ShutdownDeployment(ctx context.Context, deploymentID string) error { + u, err := url.JoinPath("deployments", deploymentID, "_shutdown") + if err != nil { + return fmt.Errorf("unable to create deployment shutdown API URL: %w", err) + } + + res, err := c.doPost(ctx, u, "", nil) + if err != nil { + return fmt.Errorf("error calling deployment shutdown API: %w", err) + } + defer res.Body.Close() + + if res.StatusCode != 200 { + resBytes, _ := io.ReadAll(res.Body) + return fmt.Errorf("got unexpected response code [%d] from deployment shutdown API: %s", res.StatusCode, string(resBytes)) + } + + return nil +} + +// DeploymentStatus returns the overall status of the deployment as well as statuses of every component. +func (c *Client) DeploymentStatus(ctx context.Context, deploymentID string) (*DeploymentStatusResponse, error) { + getResp, err := c.getDeployment(ctx, deploymentID) + if err != nil { + return nil, fmt.Errorf("error calling deployment retrieval API: %w", err) + } + defer getResp.Body.Close() + + var getRespBody struct { + Resources struct { + Elasticsearch []struct { + Info struct { + Status DeploymentStatus `json:"status"` + } `json:"info"` + } `json:"elasticsearch"` + Kibana []struct { + Info struct { + Status DeploymentStatus `json:"status"` + } `json:"info"` + } `json:"kibana"` + IntegrationsServer []struct { + Info struct { + Status DeploymentStatus `json:"status"` + } `json:"info"` + } `json:"integrations_server"` + } `json:"resources"` + } + + if err := json.NewDecoder(getResp.Body).Decode(&getRespBody); err != nil { + return nil, fmt.Errorf("error parsing deployment retrieval API response: %w", err) + } + + s := DeploymentStatusResponse{ + Elasticsearch: getRespBody.Resources.Elasticsearch[0].Info.Status, + Kibana: getRespBody.Resources.Kibana[0].Info.Status, + IntegrationsServer: getRespBody.Resources.IntegrationsServer[0].Info.Status, + } + s.Overall = overallStatus(s.Elasticsearch, s.Kibana, s.IntegrationsServer) + + return &s, nil +} + +// DeploymentIsReady returns true when the deployment is ready, checking its status +// every `tick` until `waitFor` duration. +func (c *Client) DeploymentIsReady(ctx context.Context, deploymentID string, tick time.Duration) (bool, error) { + ticker := time.NewTicker(tick) + defer ticker.Stop() + + var errs error + statusCh := make(chan DeploymentStatus, 1) + for { + select { + case <-ctx.Done(): + return false, errors.Join(errs, ctx.Err()) + case <-ticker.C: + go func() { + statusCtx, statusCancel := context.WithTimeout(ctx, tick) + defer statusCancel() + status, err := c.DeploymentStatus(statusCtx, deploymentID) + if err != nil { + errs = errors.Join(errs, err) + return + } + statusCh <- status.Overall + }() + case status := <-statusCh: + if status == DeploymentStatusStarted { + return true, nil + } + } + } +} + +func (c *Client) getDeployment(ctx context.Context, deploymentID string) (*http.Response, error) { + u, err := url.JoinPath("deployments", deploymentID) + if err != nil { + return nil, fmt.Errorf("unable to create deployment retrieval API URL: %w", err) + } + + return c.doGet(ctx, u) +} + +func overallStatus(statuses ...DeploymentStatus) DeploymentStatus { + // The overall status is started if every component's status is started. Otherwise, + // we take the non-started components' statuses and pick the first one as the overall + // status. + statusMap := map[DeploymentStatus]struct{}{} + for _, status := range statuses { + statusMap[status] = struct{}{} + } + + if len(statusMap) == 1 { + if _, allStarted := statusMap[DeploymentStatusStarted]; allStarted { + return DeploymentStatusStarted + } + } + + var overallStatus DeploymentStatus + for _, status := range statuses { + if status != DeploymentStatusStarted { + overallStatus = status + break + } + } + + return overallStatus +} + +//go:embed create_deployment_request.tmpl.json +var createDeploymentRequestTemplate string + +//go:embed create_deployment_csp_configuration.yaml +var cloudProviderSpecificValues []byte + +func generateCreateDeploymentRequestBody(req CreateDeploymentRequest) ([]byte, error) { + var csp string + // Special case: AWS us-east-1 region is just called + // us-east-1 (instead of aws-us-east-1)! + if req.Region == "us-east-1" { + csp = "aws" + } else { + regionParts := strings.Split(req.Region, "-") + if len(regionParts) < 2 { + return nil, fmt.Errorf("unable to parse CSP out of region [%s]", req.Region) + } + + csp = regionParts[0] + } + templateContext, err := createDeploymentTemplateContext(csp, req) + if err != nil { + return nil, fmt.Errorf("creating request template context: %w", err) + } + + tpl, err := template.New("create_deployment_request"). + Funcs(template.FuncMap{"json": jsonMarshal}). + Parse(createDeploymentRequestTemplate) + if err != nil { + return nil, fmt.Errorf("unable to parse deployment creation template: %w", err) + } + + var bBuf bytes.Buffer + err = tpl.Execute(&bBuf, templateContext) + if err != nil { + return nil, fmt.Errorf("rendering create deployment request template with context %v : %w", templateContext, err) + } + return bBuf.Bytes(), nil +} + +func jsonMarshal(in any) (string, error) { + jsonBytes, err := json.Marshal(in) + if err != nil { + return "", err + } + + return string(jsonBytes), nil +} + +func createDeploymentTemplateContext(csp string, req CreateDeploymentRequest) (map[string]any, error) { + cspSpecificContext, err := loadCspValues(csp) + if err != nil { + return nil, fmt.Errorf("loading csp-specific values for %q: %w", csp, err) + } + + cspSpecificContext["request"] = req + + return cspSpecificContext, nil +} + +func loadCspValues(csp string) (map[string]any, error) { + var cspValues map[string]map[string]any + + err := yaml.Unmarshal(cloudProviderSpecificValues, &cspValues) + if err != nil { + return nil, fmt.Errorf("unmarshalling error: %w", err) + } + values, supportedCSP := cspValues[csp] + if !supportedCSP { + return nil, fmt.Errorf("csp %s not supported", csp) + } + + return values, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/serverless.go b/dev-tools/mage/target/srvrlesstest/testing/ess/serverless.go new file mode 100644 index 000000000000..96245a39fc77 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/serverless.go @@ -0,0 +1,331 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ess + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "io" + "net/http" + "strings" + "time" +) + +var serverlessURL = "https://cloud.elastic.co" + +// ServerlessClient is the handler the serverless ES instance +type ServerlessClient struct { + region string + projectType string + api string + proj Project + log common.Logger +} + +// ServerlessRequest contains the data needed for a new serverless instance +type ServerlessRequest struct { + Name string `json:"name"` + RegionID string `json:"region_id"` +} + +// Project represents a serverless project +type Project struct { + Name string `json:"name"` + ID string `json:"id"` + Type string `json:"type"` + Region string `json:"region_id"` + + Credentials struct { + Username string `json:"username"` + Password string `json:"password"` + } `json:"credentials"` + + Endpoints struct { + Elasticsearch string `json:"elasticsearch"` + Kibana string `json:"kibana"` + Fleet string `json:"fleet,omitempty"` + APM string `json:"apm,omitempty"` + } `json:"endpoints"` +} + +// CredResetResponse contains the new auth details for a +// stack credential reset +type CredResetResponse struct { + Password string `json:"password"` + Username string `json:"username"` +} + +// NewServerlessClient creates a new instance of the serverless client +func NewServerlessClient(region, projectType, api string, logger common.Logger) *ServerlessClient { + return &ServerlessClient{ + region: region, + api: api, + projectType: projectType, + log: logger, + } +} + +// DeployStack creates a new serverless elastic stack +func (srv *ServerlessClient) DeployStack(ctx context.Context, req ServerlessRequest) (Project, error) { + reqBody, err := json.Marshal(req) + if err != nil { + return Project{}, fmt.Errorf("error marshaling JSON request %w", err) + } + urlPath := fmt.Sprintf("%s/api/v1/serverless/projects/%s", serverlessURL, srv.projectType) + + httpHandler, err := http.NewRequestWithContext(ctx, "POST", urlPath, bytes.NewReader(reqBody)) + if err != nil { + return Project{}, fmt.Errorf("error creating new httpRequest: %w", err) + } + + httpHandler.Header.Set("Content-Type", "application/json") + httpHandler.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", srv.api)) + + resp, err := http.DefaultClient.Do(httpHandler) + if err != nil { + return Project{}, fmt.Errorf("error performing HTTP request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + p, _ := io.ReadAll(resp.Body) + return Project{}, fmt.Errorf("Non-201 status code returned by server: %d, body: %s", resp.StatusCode, string(p)) + } + + serverlessHandle := Project{} + err = json.NewDecoder(resp.Body).Decode(&serverlessHandle) + if err != nil { + return Project{}, fmt.Errorf("error decoding JSON response: %w", err) + } + srv.proj = serverlessHandle + + // as of 8/8-ish, the serverless ESS cloud no longer provides credentials on the first POST request, we must send an additional POST + // to reset the credentials + updated, err := srv.ResetCredentials(ctx) + if err != nil { + return serverlessHandle, fmt.Errorf("error resetting credentials: %w", err) + } + srv.proj.Credentials.Username = updated.Username + srv.proj.Credentials.Password = updated.Password + + return serverlessHandle, nil +} + +// DeploymentIsReady returns true when the serverless deployment is healthy and ready +func (srv *ServerlessClient) DeploymentIsReady(ctx context.Context) (bool, error) { + err := srv.WaitForEndpoints(ctx) + if err != nil { + return false, fmt.Errorf("error waiting for endpoints to become available: %w", err) + } + srv.log.Logf("Endpoints available: ES: %s Fleet: %s Kibana: %s", srv.proj.Endpoints.Elasticsearch, srv.proj.Endpoints.Fleet, srv.proj.Endpoints.Kibana) + err = srv.WaitForElasticsearch(ctx) + if err != nil { + return false, fmt.Errorf("error waiting for ES to become available: %w", err) + } + srv.log.Logf("Elasticsearch healthy...") + err = srv.WaitForKibana(ctx) + if err != nil { + return false, fmt.Errorf("error waiting for Kibana to become available: %w", err) + } + srv.log.Logf("Kibana healthy...") + + return true, nil +} + +// DeleteDeployment deletes the deployment +func (srv *ServerlessClient) DeleteDeployment(ctx context.Context) error { + endpoint := fmt.Sprintf("%s/api/v1/serverless/projects/%s/%s", serverlessURL, srv.proj.Type, srv.proj.ID) + req, err := http.NewRequestWithContext(ctx, "DELETE", endpoint, nil) + if err != nil { + return fmt.Errorf("error creating HTTP request: %w", err) + } + req.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", srv.api)) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return fmt.Errorf("error performing delete request: %w", err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + errBody, _ := io.ReadAll(resp.Body) + return fmt.Errorf("unexpected status code %d from %s: %s", resp.StatusCode, req.URL, errBody) + } + return nil +} + +// WaitForEndpoints polls the API and waits until fleet/ES endpoints are available +func (srv *ServerlessClient) WaitForEndpoints(ctx context.Context) error { + reqURL := fmt.Sprintf("%s/api/v1/serverless/projects/%s/%s", serverlessURL, srv.proj.Type, srv.proj.ID) + httpHandler, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) + if err != nil { + return fmt.Errorf("error creating http request: %w", err) + } + + httpHandler.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", srv.api)) + + readyFunc := func(resp *http.Response) bool { + project := &Project{} + err = json.NewDecoder(resp.Body).Decode(project) + resp.Body.Close() + if err != nil { + srv.log.Logf("response decoding error: %v", err) + return false + } + if project.Endpoints.Elasticsearch != "" { + // fake out the fleet URL, set to ES url + if project.Endpoints.Fleet == "" { + project.Endpoints.Fleet = strings.Replace(project.Endpoints.Elasticsearch, "es.eks", "fleet.eks", 1) + } + + srv.proj.Endpoints = project.Endpoints + return true + } + return false + } + + err = srv.waitForRemoteState(ctx, httpHandler, time.Second*5, readyFunc) + if err != nil { + return fmt.Errorf("error waiting for remote instance to start: %w", err) + } + + return nil +} + +// WaitForElasticsearch waits until the ES endpoint is healthy +func (srv *ServerlessClient) WaitForElasticsearch(ctx context.Context) error { + req, err := http.NewRequestWithContext(ctx, "GET", srv.proj.Endpoints.Elasticsearch, nil) + if err != nil { + return fmt.Errorf("error creating HTTP request: %w", err) + } + req.SetBasicAuth(srv.proj.Credentials.Username, srv.proj.Credentials.Password) + + // _cluster/health no longer works on serverless, just check response code + readyFunc := func(resp *http.Response) bool { + return resp.StatusCode == 200 + } + + err = srv.waitForRemoteState(ctx, req, time.Second*5, readyFunc) + if err != nil { + return fmt.Errorf("error waiting for ES to become healthy: %w", err) + } + return nil +} + +// WaitForKibana waits until the kibana endpoint is healthy +func (srv *ServerlessClient) WaitForKibana(ctx context.Context) error { + req, err := http.NewRequestWithContext(ctx, "GET", srv.proj.Endpoints.Kibana+"/api/status", nil) + if err != nil { + return fmt.Errorf("error creating HTTP request: %w", err) + } + req.SetBasicAuth(srv.proj.Credentials.Username, srv.proj.Credentials.Password) + + readyFunc := func(resp *http.Response) bool { + var status struct { + Status struct { + Overall struct { + Level string `json:"level"` + } `json:"overall"` + } `json:"status"` + } + err = json.NewDecoder(resp.Body).Decode(&status) + if err != nil { + srv.log.Logf("response decoding error: %v", err) + return false + } + resp.Body.Close() + return status.Status.Overall.Level == "available" + } + + err = srv.waitForRemoteState(ctx, req, time.Second*5, readyFunc) + if err != nil { + return fmt.Errorf("error waiting for ES to become healthy: %w", err) + } + return nil +} + +// ResetCredentials resets the credentials for the given ESS instance +func (srv *ServerlessClient) ResetCredentials(ctx context.Context) (CredResetResponse, error) { + resetURL := fmt.Sprintf("%s/api/v1/serverless/projects/%s/%s/_reset-internal-credentials", serverlessURL, srv.projectType, srv.proj.ID) + + resetHandler, err := http.NewRequestWithContext(ctx, "POST", resetURL, nil) + if err != nil { + return CredResetResponse{}, fmt.Errorf("error creating new httpRequest: %w", err) + } + + resetHandler.Header.Set("Content-Type", "application/json") + resetHandler.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", srv.api)) + + resp, err := http.DefaultClient.Do(resetHandler) + if err != nil { + return CredResetResponse{}, fmt.Errorf("error performing HTTP request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + p, _ := io.ReadAll(resp.Body) + return CredResetResponse{}, fmt.Errorf("Non-200 status code returned by server: %d, body: %s", resp.StatusCode, string(p)) + } + + updated := CredResetResponse{} + err = json.NewDecoder(resp.Body).Decode(&updated) + if err != nil { + return CredResetResponse{}, fmt.Errorf("error decoding JSON response: %w", err) + } + + return updated, nil +} + +func (srv *ServerlessClient) waitForRemoteState(ctx context.Context, httpHandler *http.Request, tick time.Duration, isReady func(*http.Response) bool) error { + timer := time.NewTimer(time.Millisecond) + // in cases where we get a timeout, also return the last error returned via HTTP + var lastErr error + for { + select { + case <-ctx.Done(): + return fmt.Errorf("got context done; Last HTTP Error: %w", lastErr) + case <-timer.C: + } + + resp, err := http.DefaultClient.Do(httpHandler) + if err != nil { + errMsg := fmt.Errorf("request error: %w", err) + // Logger interface doesn't have a debug level and we don't want to auto-log these; + // as most of the time it's just spam. + //srv.log.Logf(errMsg.Error()) + lastErr = errMsg + timer.Reset(time.Second * 5) + continue + } + if resp.StatusCode != http.StatusOK { + errBody, _ := io.ReadAll(resp.Body) + errMsg := fmt.Errorf("unexpected status code %d in request to %s, body: %s", resp.StatusCode, httpHandler.URL.String(), string(errBody)) + //srv.log.Logf(errMsg.Error()) + lastErr = errMsg + resp.Body.Close() + timer.Reset(time.Second * 5) + continue + } + + if isReady(resp) { + return nil + } + timer.Reset(tick) + } +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/serverless_provisioner.go b/dev-tools/mage/target/srvrlesstest/testing/ess/serverless_provisioner.go new file mode 100644 index 000000000000..0ae3280016d8 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/serverless_provisioner.go @@ -0,0 +1,275 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ess + +import ( + "context" + "encoding/json" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "io" + "net/http" + "time" + + "github.com/elastic/elastic-agent-libs/logp" +) + +const ProvisionerServerless = "serverless" + +// ServerlessProvisioner contains +type ServerlessProvisioner struct { + cfg ProvisionerConfig + log common.Logger +} + +type defaultLogger struct { + wrapped *logp.Logger +} + +// Logf implements the runner.Logger interface +func (log *defaultLogger) Logf(format string, args ...any) { + if len(args) == 0 { + + } else { + log.wrapped.Infof(format, args) + } + +} + +// ServerlessRegions is the JSON response from the serverless regions API endpoint +type ServerlessRegions struct { + CSP string `json:"csp"` + CSPRegion string `json:"csp_region"` + ID string `json:"id"` + Name string `json:"name"` +} + +// NewServerlessProvisioner creates a new StackProvisioner instance for serverless +func NewServerlessProvisioner(ctx context.Context, cfg ProvisionerConfig) (common.StackProvisioner, error) { + prov := &ServerlessProvisioner{ + cfg: cfg, + log: &defaultLogger{wrapped: logp.L()}, + } + err := prov.CheckCloudRegion(ctx) + if err != nil { + return nil, fmt.Errorf("error checking region setting: %w", err) + } + return prov, nil +} + +func (prov *ServerlessProvisioner) Name() string { + return ProvisionerServerless +} + +// SetLogger sets the logger for the +func (prov *ServerlessProvisioner) SetLogger(l common.Logger) { + prov.log = l +} + +// Create creates a stack. +func (prov *ServerlessProvisioner) Create(ctx context.Context, request common.StackRequest) (common.Stack, error) { + // allow up to 4 minutes for requests + createCtx, createCancel := context.WithTimeout(ctx, 4*time.Minute) + defer createCancel() + + client := NewServerlessClient(prov.cfg.Region, "observability", prov.cfg.APIKey, prov.log) + srvReq := ServerlessRequest{Name: request.ID, RegionID: prov.cfg.Region} + + prov.log.Logf("Creating serverless stack %s [stack_id: %s]", request.Version, request.ID) + proj, err := client.DeployStack(createCtx, srvReq) + if err != nil { + return common.Stack{}, fmt.Errorf("error deploying stack for request %s: %w", request.ID, err) + } + err = client.WaitForEndpoints(createCtx) + if err != nil { + return common.Stack{}, fmt.Errorf("error waiting for endpoints to become available for serverless stack %s [stack_id: %s, deployment_id: %s]: %w", request.Version, request.ID, proj.ID, err) + } + stack := common.Stack{ + ID: request.ID, + Provisioner: prov.Name(), + Version: request.Version, + Elasticsearch: client.proj.Endpoints.Elasticsearch, + Kibana: client.proj.Endpoints.Kibana, + Username: client.proj.Credentials.Username, + Password: client.proj.Credentials.Password, + Internal: map[string]interface{}{ + "deployment_id": proj.ID, + "deployment_type": proj.Type, + }, + Ready: false, + } + prov.log.Logf("Created serverless stack %s [stack_id: %s, deployment_id: %s]", request.Version, request.ID, proj.ID) + return stack, nil +} + +// WaitForReady should block until the stack is ready or the context is cancelled. +func (prov *ServerlessProvisioner) WaitForReady(ctx context.Context, stack common.Stack) (common.Stack, error) { + deploymentID, deploymentType, err := prov.getDeploymentInfo(stack) + if err != nil { + return stack, fmt.Errorf("failed to get deployment info from the stack: %w", err) + } + + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + + client := NewServerlessClient(prov.cfg.Region, "observability", prov.cfg.APIKey, prov.log) + client.proj.ID = deploymentID + client.proj.Type = deploymentType + client.proj.Region = prov.cfg.Region + client.proj.Endpoints.Elasticsearch = stack.Elasticsearch + client.proj.Endpoints.Kibana = stack.Kibana + client.proj.Credentials.Username = stack.Username + client.proj.Credentials.Password = stack.Password + + prov.log.Logf("Waiting for serverless stack %s to be ready [stack_id: %s, deployment_id: %s]", stack.Version, stack.ID, deploymentID) + + errCh := make(chan error) + var lastErr error + + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + if lastErr == nil { + lastErr = ctx.Err() + } + return stack, fmt.Errorf("serverless stack %s [stack_id: %s, deployment_id: %s] never became ready: %w", stack.Version, stack.ID, deploymentID, lastErr) + case <-ticker.C: + go func() { + statusCtx, statusCancel := context.WithTimeout(ctx, 30*time.Second) + defer statusCancel() + ready, err := client.DeploymentIsReady(statusCtx) + if err != nil { + errCh <- err + } else if !ready { + errCh <- fmt.Errorf("serverless stack %s [stack_id: %s, deployment_id: %s] never became ready", stack.Version, stack.ID, deploymentID) + } else { + errCh <- nil + } + }() + case err := <-errCh: + if err == nil { + stack.Ready = true + return stack, nil + } + lastErr = err + } + } +} + +// Delete deletes a stack. +func (prov *ServerlessProvisioner) Delete(ctx context.Context, stack common.Stack) error { + deploymentID, deploymentType, err := prov.getDeploymentInfo(stack) + if err != nil { + return fmt.Errorf("failed to get deployment info from the stack: %w", err) + } + + client := NewServerlessClient(prov.cfg.Region, "observability", prov.cfg.APIKey, prov.log) + client.proj.ID = deploymentID + client.proj.Type = deploymentType + client.proj.Region = prov.cfg.Region + client.proj.Endpoints.Elasticsearch = stack.Elasticsearch + client.proj.Endpoints.Kibana = stack.Kibana + client.proj.Credentials.Username = stack.Username + client.proj.Credentials.Password = stack.Password + + prov.log.Logf("Destroying serverless stack %s [stack_id: %s, deployment_id: %s]", stack.Version, stack.ID, deploymentID) + err = client.DeleteDeployment(ctx) + if err != nil { + return fmt.Errorf("error removing serverless stack %s [stack_id: %s, deployment_id: %s]: %w", stack.Version, stack.ID, deploymentID, err) + } + return nil +} + +// CheckCloudRegion checks to see if the provided region is valid for the serverless +// if we have an invalid region, overwrite with a valid one. +// The "normal" and serverless ESS APIs have different regions, hence why we need this. +func (prov *ServerlessProvisioner) CheckCloudRegion(ctx context.Context) error { + urlPath := fmt.Sprintf("%s/api/v1/serverless/regions", serverlessURL) + + httpHandler, err := http.NewRequestWithContext(ctx, "GET", urlPath, nil) + if err != nil { + return fmt.Errorf("error creating new httpRequest: %w", err) + } + + httpHandler.Header.Set("Content-Type", "application/json") + httpHandler.Header.Set("Authorization", fmt.Sprintf("ApiKey %s", prov.cfg.APIKey)) + + resp, err := http.DefaultClient.Do(httpHandler) + if err != nil { + return fmt.Errorf("error performing HTTP request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + p, _ := io.ReadAll(resp.Body) + return fmt.Errorf("Non-201 status code returned by server: %d, body: %s", resp.StatusCode, string(p)) + } + regions := []ServerlessRegions{} + + err = json.NewDecoder(resp.Body).Decode(®ions) + if err != nil { + return fmt.Errorf("error unpacking regions from list: %w", err) + } + resp.Body.Close() + + found := false + for _, region := range regions { + if region.ID == prov.cfg.Region { + found = true + } + } + if !found { + if len(regions) == 0 { + return fmt.Errorf("no regions found for cloudless API") + } + newRegion := regions[0].ID + prov.log.Logf("WARNING: Region %s is not available for serverless, selecting %s. Other regions are:", prov.cfg.Region, newRegion) + for _, avail := range regions { + prov.log.Logf(" %s - %s", avail.ID, avail.Name) + } + prov.cfg.Region = newRegion + } + + return nil +} + +func (prov *ServerlessProvisioner) getDeploymentInfo(stack common.Stack) (string, string, error) { + if stack.Internal == nil { + return "", "", fmt.Errorf("missing internal information") + } + deploymentIDRaw, ok := stack.Internal["deployment_id"] + if !ok { + return "", "", fmt.Errorf("missing internal deployment_id") + } + deploymentID, ok := deploymentIDRaw.(string) + if !ok { + return "", "", fmt.Errorf("internal deployment_id not a string") + } + deploymentTypeRaw, ok := stack.Internal["deployment_type"] + if !ok { + return "", "", fmt.Errorf("missing internal deployment_type") + } + deploymentType, ok := deploymentTypeRaw.(string) + if !ok { + return "", "", fmt.Errorf("internal deployment_type is not a string") + } + return deploymentID, deploymentType, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ess/statful_provisioner.go b/dev-tools/mage/target/srvrlesstest/testing/ess/statful_provisioner.go new file mode 100644 index 000000000000..0ab6bf8db7d5 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ess/statful_provisioner.go @@ -0,0 +1,201 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ess + +import ( + "context" + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "os" + "strings" + "time" +) + +const ProvisionerStateful = "stateful" + +// ProvisionerConfig is the configuration for the ESS statefulProvisioner. +type ProvisionerConfig struct { + Identifier string + APIKey string + Region string +} + +// Validate returns an error if the information is invalid. +func (c *ProvisionerConfig) Validate() error { + if c.Identifier == "" { + return errors.New("field Identifier must be set") + } + if c.APIKey == "" { + return errors.New("field APIKey must be set") + } + if c.Region == "" { + return errors.New("field Region must be set") + } + return nil +} + +type statefulProvisioner struct { + logger common.Logger + cfg ProvisionerConfig + client *Client +} + +// NewProvisioner creates the ESS stateful Provisioner +func NewProvisioner(cfg ProvisionerConfig) (common.StackProvisioner, error) { + err := cfg.Validate() + if err != nil { + return nil, err + } + essClient := NewClient(Config{ + ApiKey: cfg.APIKey, + }) + return &statefulProvisioner{ + cfg: cfg, + client: essClient, + }, nil +} + +func (p *statefulProvisioner) Name() string { + return ProvisionerStateful +} + +func (p *statefulProvisioner) SetLogger(l common.Logger) { + p.logger = l +} + +// Create creates a stack. +func (p *statefulProvisioner) Create(ctx context.Context, request common.StackRequest) (common.Stack, error) { + // allow up to 2 minutes for request + createCtx, createCancel := context.WithTimeout(ctx, 2*time.Minute) + defer createCancel() + deploymentTags := map[string]string{ + "division": "engineering", + "org": "ingest", + "team": "elastic-agent-control-plane", + "project": "elastic-agent", + "integration-tests": "true", + } + // If the CI env var is set, this mean we are running inside the CI pipeline and some expected env vars are exposed + if _, e := os.LookupEnv("CI"); e { + deploymentTags["buildkite_id"] = os.Getenv("BUILDKITE_BUILD_NUMBER") + deploymentTags["creator"] = os.Getenv("BUILDKITE_BUILD_CREATOR") + deploymentTags["buildkite_url"] = os.Getenv("BUILDKITE_BUILD_URL") + deploymentTags["ci"] = "true" + } + resp, err := p.createDeployment(createCtx, request, deploymentTags) + if err != nil { + return common.Stack{}, err + } + return common.Stack{ + ID: request.ID, + Provisioner: p.Name(), + Version: request.Version, + Elasticsearch: resp.ElasticsearchEndpoint, + Kibana: resp.KibanaEndpoint, + Username: resp.Username, + Password: resp.Password, + Internal: map[string]interface{}{ + "deployment_id": resp.ID, + }, + Ready: false, + }, nil +} + +// WaitForReady should block until the stack is ready or the context is cancelled. +func (p *statefulProvisioner) WaitForReady(ctx context.Context, stack common.Stack) (common.Stack, error) { + deploymentID, err := p.getDeploymentID(stack) + if err != nil { + return stack, fmt.Errorf("failed to get deployment ID from the stack: %w", err) + } + // allow up to 10 minutes for it to become ready + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + p.logger.Logf("Waiting for cloud stack %s to be ready [stack_id: %s, deployment_id: %s]", stack.Version, stack.ID, deploymentID) + ready, err := p.client.DeploymentIsReady(ctx, deploymentID, 30*time.Second) + if err != nil { + return stack, fmt.Errorf("failed to check for cloud %s [stack_id: %s, deployment_id: %s] to be ready: %w", stack.Version, stack.ID, deploymentID, err) + } + if !ready { + return stack, fmt.Errorf("cloud %s [stack_id: %s, deployment_id: %s] never became ready: %w", stack.Version, stack.ID, deploymentID, err) + } + stack.Ready = true + return stack, nil +} + +// Delete deletes a stack. +func (p *statefulProvisioner) Delete(ctx context.Context, stack common.Stack) error { + deploymentID, err := p.getDeploymentID(stack) + if err != nil { + return err + } + + // allow up to 1 minute for request + ctx, cancel := context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + + p.logger.Logf("Destroying cloud stack %s [stack_id: %s, deployment_id: %s]", stack.Version, stack.ID, deploymentID) + return p.client.ShutdownDeployment(ctx, deploymentID) +} + +func (p *statefulProvisioner) createDeployment(ctx context.Context, r common.StackRequest, tags map[string]string) (*CreateDeploymentResponse, error) { + ctx, cancel := context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + + p.logger.Logf("Creating cloud stack %s [stack_id: %s]", r.Version, r.ID) + name := fmt.Sprintf("%s-%s", strings.Replace(p.cfg.Identifier, ".", "-", -1), r.ID) + + // prepare tags + tagArray := make([]Tag, 0, len(tags)) + for k, v := range tags { + tagArray = append(tagArray, Tag{ + Key: k, + Value: v, + }) + } + + createDeploymentRequest := CreateDeploymentRequest{ + Name: name, + Region: p.cfg.Region, + Version: r.Version, + Tags: tagArray, + } + + resp, err := p.client.CreateDeployment(ctx, createDeploymentRequest) + if err != nil { + p.logger.Logf("Failed to create ESS cloud %s: %s", r.Version, err) + return nil, fmt.Errorf("failed to create ESS cloud for version %s: %w", r.Version, err) + } + p.logger.Logf("Created cloud stack %s [stack_id: %s, deployment_id: %s]", r.Version, r.ID, resp.ID) + return resp, nil +} + +func (p *statefulProvisioner) getDeploymentID(stack common.Stack) (string, error) { + if stack.Internal == nil { + return "", fmt.Errorf("missing internal information") + } + deploymentIDRaw, ok := stack.Internal["deployment_id"] + if !ok { + return "", fmt.Errorf("missing internal deployment_id") + } + deploymentID, ok := deploymentIDRaw.(string) + if !ok { + return "", fmt.Errorf("internal deployment_id not a string") + } + return deploymentID, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/fetcher.go b/dev-tools/mage/target/srvrlesstest/testing/fetcher.go new file mode 100644 index 000000000000..e9e797c18231 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/fetcher.go @@ -0,0 +1,256 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package testing + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "sync" +) + +const extAsc = ".asc" +const extHash = ".sha512" + +var ( + // ErrUnsupportedPlatform returned when the operating system and architecture combination is not supported. + ErrUnsupportedPlatform = errors.New("platform is not supported") +) + +// packageArchMap provides a mapping for the endings of the builds of Elastic Agent based on the +// operating system and architecture. +var packageArchMap = map[string]string{ + "linux-amd64-targz": "linux-x86_64.tar.gz", + "linux-amd64-deb": "amd64.deb", + "linux-amd64-rpm": "x86_64.rpm", + "linux-arm64-targz": "linux-arm64.tar.gz", + "linux-arm64-deb": "arm64.deb", + "linux-arm64-rpm": "aarch64.rpm", + "windows-amd64-zip": "windows-x86_64.zip", + "darwin-amd64-targz": "darwin-x86_64.tar.gz", + "darwin-arm64-targz": "darwin-aarch64.tar.gz", +} + +// GetPackageSuffix returns the suffix ending for the builds of Elastic Agent based on the +// operating system and architecture. +func GetPackageSuffix(operatingSystem string, architecture string, packageFormat string) (string, error) { + suffix, ok := packageArchMap[fmt.Sprintf("%s-%s-%s", operatingSystem, architecture, packageFormat)] + if !ok { + return "", fmt.Errorf("%w: %s/%s/%s", ErrUnsupportedPlatform, operatingSystem, architecture, packageFormat) + } + return suffix, nil +} + +// FetcherResult represents a pending result from the fetcher. +type FetcherResult interface { + // Name is the name of the fetched result. + Name() string + // Fetch performs the actual fetch into the provided directory. + Fetch(ctx context.Context, l Logger, dir string) error +} + +// Fetcher provides a path for fetching the Elastic Agent compressed archive +// to extract and run for the integration test. +type Fetcher interface { + // Name returns a unique name for the fetcher. + // + // This name is used as a caching key and if a build has already been fetched for a version then it will not + // be fetched again as long as the same fetcher is being used. + Name() string + // Fetch fetches the Elastic Agent compressed archive to extract and run for the integration test. + // + // The extraction is handled by the caller. This should only download the file + // and place it into the directory. + Fetch(ctx context.Context, operatingSystem string, architecture string, version string, packageFormat string) (FetcherResult, error) +} + +// fetchCache is global to all tests, reducing the time required to fetch the needed artifacts +// to only be need at the start of the first test. +var fetchCache map[string]*fetcherCache +var fetchCacheMx sync.Mutex + +// fetcherCache provides a caching mechanism for only fetching what has not already been fetched. +type fetcherCache struct { + mx sync.Mutex + dir string +} + +// fetch either uses the cache result or performs a new fetch if the content is missing. +func (c *fetcherCache) fetch(ctx context.Context, l Logger, res FetcherResult) (string, error) { + name := res.Name() + src := filepath.Join(c.dir, name) + _, err := os.Stat(src) + if err == nil || os.IsExist(err) { + l.Logf("Using existing artifact %s", name) + return src, nil + } + err = res.Fetch(ctx, l, c.dir) + if err != nil { + return "", err + } + return src, nil +} + +func splitFileType(name string) (string, string, error) { + if strings.HasSuffix(name, ".tar.gz") { + return strings.TrimSuffix(name, ".tar.gz"), ".tar.gz", nil + } + if strings.HasSuffix(name, ".zip") { + return strings.TrimSuffix(name, ".zip"), ".zip", nil + } + if strings.HasSuffix(name, ".deb") { + return strings.TrimSuffix(name, ".deb"), ".deb", nil + } + if strings.HasSuffix(name, ".rpm") { + return strings.TrimSuffix(name, ".rpm"), ".rpm", nil + } + return "", "", fmt.Errorf("unknown file extension type: %s", filepath.Ext(name)) +} + +// untar takes a .tar.gz and extracts its content +func untar(archivePath string, extractDir string) error { + r, err := os.Open(archivePath) + if err != nil { + return err + } + defer r.Close() + + zr, err := gzip.NewReader(r) + if err != nil { + return err + } + + tr := tar.NewReader(zr) + + for { + f, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return err + } + + fi := f.FileInfo() + mode := fi.Mode() + abs := filepath.Join(extractDir, f.Name) //nolint:gosec // used only in tests + switch { + case mode.IsRegular(): + // just to be sure, it should already be created by Dir type + if err := os.MkdirAll(filepath.Dir(abs), 0755); err != nil { + return fmt.Errorf("failed creating directory for file %s: %w", abs, err) + } + + wf, err := os.OpenFile(abs, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode.Perm()) + if err != nil { + return fmt.Errorf("failed creating file %s: %w", abs, err) + } + + _, err = io.Copy(wf, tr) //nolint:gosec // used only in tests + if closeErr := wf.Close(); closeErr != nil && err == nil { + err = closeErr + } + if err != nil { + return fmt.Errorf("error writing file %s: %w", abs, err) + } + case mode.IsDir(): + if err := os.MkdirAll(abs, 0755); err != nil { + return fmt.Errorf("failed creating directory %s: %w", abs, err) + } + case mode.Type()&os.ModeSymlink == os.ModeSymlink: + // just to be sure, it should already be created by Dir type + if err := os.MkdirAll(filepath.Dir(abs), 0755); err != nil { + return fmt.Errorf("failed creating directory for symlink %s: %w", abs, err) + } + if err := os.Symlink(f.Linkname, abs); err != nil { + return fmt.Errorf("failed creating symlink %s: %w", abs, err) + } + default: + // skip unknown types + } + } + return nil +} + +// unzip takes a .zip and extracts its content +func unzip(archivePath string, extractDir string) error { + r, err := zip.OpenReader(archivePath) + if err != nil { + return err + } + defer r.Close() + + unpackFile := func(f *zip.File) (err error) { + rc, err := f.Open() + if err != nil { + return err + } + defer func() { + if cerr := rc.Close(); cerr != nil { + err = errors.Join(err, cerr) + } + }() + + fi := f.FileInfo() + mode := fi.Mode() + abs := filepath.Join(extractDir, f.Name) //nolint:gosec // used only in tests + switch { + case mode.IsRegular(): + // just to be sure, it should already be created by Dir type + if err := os.MkdirAll(filepath.Dir(abs), f.Mode()); err != nil { + return fmt.Errorf("failed creating directory for file %s: %w", abs, err) + } + + f, err := os.OpenFile(abs, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) + if err != nil { + return fmt.Errorf("failed creating file %s: %w", abs, err) + } + defer func() { + if cerr := f.Close(); cerr != nil { + err = errors.Join(err, cerr) + } + }() + + //nolint:gosec // used only in tests + if _, err = io.Copy(f, rc); err != nil { + return fmt.Errorf("error writing file %s: %w", abs, err) + } + case mode.IsDir(): + if err := os.MkdirAll(abs, f.Mode()); err != nil { + return fmt.Errorf("failed creating directory %s: %w", abs, err) + } + default: + // skip unknown types + } + return nil + } + + for _, f := range r.File { + if err := unpackFile(f); err != nil { + return err + } + } + return nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/kubernetes/image.go b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/image.go new file mode 100644 index 000000000000..f15970402a7d --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/image.go @@ -0,0 +1,258 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package kubernetes + +import ( + "archive/tar" + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + devtools "github.com/elastic/beats/v7/dev-tools/mage" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "io" + "os" + "path/filepath" + "strings" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/client" +) + +type DockerConfig struct { + CurrentContext string `json:"currentContext"` +} + +type DockerContext struct { + Name string `json:"Name"` + Metadata map[string]interface{} `json:"Metadata"` + Endpoints map[string]Endpoint `json:"Endpoints"` + Storage map[string]interface{} `json:"Storage"` + TLS bool `json:"TLS"` +} + +type DockerBuildOutput struct { + Stream string `json:"stream"` + Aux struct { + ID string `json:"ID"` + } `json:"aux"` +} + +type Endpoint struct { + Host string `json:"Host"` +} + +// AddK8STestsToImage compiles and adds the k8s-inner-tests binary to the given image +func AddK8STestsToImage(ctx context.Context, logger common.Logger, baseImage string, arch string) (string, error) { + // compile k8s test with tag kubernetes_inner + buildBase, err := filepath.Abs("build") + if err != nil { + return "", err + } + + testBinary := filepath.Join(buildBase, "k8s-inner-tests") + + params := devtools.GoTestArgs{ + TestName: "k8s-inner-tests", + Race: false, + Packages: []string{"./testing/kubernetes_inner/..."}, + Tags: []string{"kubernetes_inner"}, + OutputFile: testBinary, + Env: map[string]string{ + "GOOS": "linux", + "GOARCH": arch, + "CGO_ENABLED": "0", + }, + } + + if err := devtools.GoTestBuild(ctx, params); err != nil { + return "", err + } + + cli, err := getDockerClient() + if err != nil { + return "", err + } + + // dockerfile to just copy the tests binary + dockerfile := fmt.Sprintf(` + FROM %s + COPY testsBinary /usr/share/elastic-agent/k8s-inner-tests + `, baseImage) + + // Create a tar archive with the Dockerfile and the binary + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + + // Add Dockerfile to tar + err = tw.WriteHeader(&tar.Header{ + Name: "Dockerfile", + Size: int64(len(dockerfile)), + }) + if err != nil { + return "", err + } + _, err = tw.Write([]byte(dockerfile)) + if err != nil { + return "", err + } + + // Add binary to tar + binaryFile, err := os.Open(testBinary) + if err != nil { + return "", err + } + defer binaryFile.Close() + + info, err := binaryFile.Stat() + if err != nil { + return "", err + } + + err = tw.WriteHeader(&tar.Header{ + Name: "testsBinary", + Mode: 0777, + Size: info.Size(), + }) + if err != nil { + return "", err + } + _, err = io.Copy(tw, binaryFile) + if err != nil { + return "", err + } + + err = tw.Close() + if err != nil { + return "", err + } + + outputImage := baseImage + "-tests" + + // Build the image + imageBuildResponse, err := cli.ImageBuild(ctx, &buf, types.ImageBuildOptions{ + Tags: []string{outputImage}, + Dockerfile: "Dockerfile", + Remove: true, + }) + if err != nil { + return "", err + } + defer imageBuildResponse.Body.Close() + + scanner := bufio.NewScanner(imageBuildResponse.Body) + for scanner.Scan() { + line := scanner.Text() + var output DockerBuildOutput + if err := json.Unmarshal([]byte(line), &output); err != nil { + return "", fmt.Errorf("error at parsing JSON: %w", err) + } + + if output.Stream != "" { + if out := strings.TrimRight(output.Stream, "\n"); out != "" { + logger.Logf(out) + } + } + } + + if err := scanner.Err(); err != nil { + return "", err + } + + return outputImage, nil +} + +// getDockerClient returns an instance of the Docker client. It first checks +// if there is a current context inside $/.docker/config.json and instantiates +// a client based on it. Otherwise, it fallbacks to a docker client with values +// from environment variables. +func getDockerClient() (*client.Client, error) { + + envClient := func() (*client.Client, error) { + return client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) + } + + type DockerConfig struct { + CurrentContext string `json:"currentContext"` + } + + configFile := filepath.Join(os.Getenv("HOME"), ".docker", "config.json") + file, err := os.Open(configFile) + if err != nil { + if os.IsNotExist(err) { + return envClient() + } + return nil, err + } + defer file.Close() + + var config DockerConfig + decoder := json.NewDecoder(file) + err = decoder.Decode(&config) + if err != nil { + return nil, err + } + + if config.CurrentContext == "" { + return envClient() + } + + contextDir := filepath.Join(os.Getenv("HOME"), ".docker", "contexts", "meta") + files, err := os.ReadDir(contextDir) + if err != nil { + if os.IsNotExist(err) { + return envClient() + } + return nil, fmt.Errorf("unable to read Docker contexts directory: %w", err) + } + + for _, f := range files { + if f.IsDir() { + metaFile := filepath.Join(contextDir, f.Name(), "meta.json") + if _, err := os.Stat(metaFile); err == nil { + if os.IsNotExist(err) { + return envClient() + } + var dockerContext DockerContext + content, err := os.ReadFile(metaFile) + if err != nil { + return nil, fmt.Errorf("unable to read Docker context meta file: %w", err) + } + if err := json.Unmarshal(content, &dockerContext); err != nil { + return nil, fmt.Errorf("unable to parse Docker context meta file: %w", err) + } + if dockerContext.Name != config.CurrentContext { + continue + } + + endpoint, ok := dockerContext.Endpoints["docker"] + if !ok { + return nil, fmt.Errorf("docker endpoint not found in context") + } + + return client.NewClientWithOpts( + client.WithHost(endpoint.Host), + client.WithAPIVersionNegotiation(), + ) + } + } + } + + return envClient() +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/kubernetes/kind/provisioner.go b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/kind/provisioner.go new file mode 100644 index 000000000000..4769311941d8 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/kind/provisioner.go @@ -0,0 +1,298 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package kind + +import ( + "bytes" + "context" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/kubernetes" + "io" + "os" + "os/exec" + "runtime" + "strings" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/e2e-framework/klient" + "sigs.k8s.io/e2e-framework/klient/k8s" + "sigs.k8s.io/e2e-framework/klient/k8s/resources" + "sigs.k8s.io/e2e-framework/klient/wait" + "sigs.k8s.io/e2e-framework/klient/wait/conditions" +) + +const ( + Name = "kind" +) + +const clusterCfg string = ` +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: ClusterConfiguration + scheduler: + extraArgs: + bind-address: "0.0.0.0" + secure-port: "10259" + controllerManager: + extraArgs: + bind-address: "0.0.0.0" + secure-port: "10257" +` + +func NewProvisioner() common.InstanceProvisioner { + return &provisioner{} +} + +type provisioner struct { + logger common.Logger +} + +func (p *provisioner) Name() string { + return Name +} + +func (p *provisioner) Type() common.ProvisionerType { + return common.ProvisionerTypeK8SCluster +} + +func (p *provisioner) SetLogger(l common.Logger) { + p.logger = l +} + +func (p *provisioner) Supported(batch define.OS) bool { + if batch.Type != define.Kubernetes || batch.Arch != runtime.GOARCH { + return false + } + if batch.Distro != "" && batch.Distro != Name { + // not kind, don't run + return false + } + return true +} + +func (p *provisioner) Provision(ctx context.Context, cfg common.Config, batches []common.OSBatch) ([]common.Instance, error) { + var instances []common.Instance + for _, batch := range batches { + k8sVersion := fmt.Sprintf("v%s", batch.OS.Version) + instanceName := fmt.Sprintf("%s-%s", k8sVersion, batch.Batch.Group) + + agentImageName, err := kubernetes.VariantToImage(batch.OS.DockerVariant) + if err != nil { + return nil, err + } + agentImageName = fmt.Sprintf("%s:%s", agentImageName, cfg.AgentVersion) + agentImage, err := kubernetes.AddK8STestsToImage(ctx, p.logger, agentImageName, runtime.GOARCH) + if err != nil { + return nil, fmt.Errorf("failed to add k8s tests to image %s: %w", agentImageName, err) + } + + exists, err := p.clusterExists(instanceName) + if err != nil { + return nil, fmt.Errorf("failed to check if cluster exists: %w", err) + } + if !exists { + p.logger.Logf("Provisioning kind cluster %s", instanceName) + nodeImage := fmt.Sprintf("kindest/node:%s", k8sVersion) + clusterConfig := strings.NewReader(clusterCfg) + + ret, err := p.kindCmd(clusterConfig, "create", "cluster", "--name", instanceName, "--image", nodeImage, "--config", "-") + if err != nil { + return nil, fmt.Errorf("kind: failed to create cluster %s: %s", instanceName, ret.stderr) + } + + exists, err = p.clusterExists(instanceName) + if err != nil { + return nil, err + } + + if !exists { + return nil, fmt.Errorf("kind: failed to find cluster %s after successful creation", instanceName) + } + } else { + p.logger.Logf("Kind cluster %s already exists", instanceName) + } + + kConfigPath, err := p.writeKubeconfig(instanceName) + if err != nil { + return nil, err + } + + c, err := klient.NewWithKubeConfigFile(kConfigPath) + if err != nil { + return nil, err + } + + if err := p.WaitForControlPlane(c); err != nil { + return nil, err + } + + if err := p.LoadImage(ctx, instanceName, agentImage); err != nil { + return nil, err + } + + instances = append(instances, common.Instance{ + ID: batch.ID, + Name: instanceName, + Provisioner: Name, + IP: "", + Username: "", + RemotePath: "", + Internal: map[string]interface{}{ + "config": kConfigPath, + "version": k8sVersion, + "agent_image": agentImage, + }, + }) + } + + return instances, nil +} + +func (p *provisioner) LoadImage(ctx context.Context, clusterName string, image string) error { + ret, err := p.kindCmd(nil, "load", "docker-image", "--name", clusterName, image) + if err != nil { + return fmt.Errorf("kind: load docker-image %s failed: %w: %s", image, err, ret.stderr) + } + return nil +} + +func (p *provisioner) WaitForControlPlane(client klient.Client) error { + r, err := resources.New(client.RESTConfig()) + if err != nil { + return err + } + for _, sl := range []metav1.LabelSelectorRequirement{ + {Key: "component", Operator: metav1.LabelSelectorOpIn, Values: []string{"etcd", "kube-apiserver", "kube-controller-manager", "kube-scheduler"}}, + {Key: "k8s-app", Operator: metav1.LabelSelectorOpIn, Values: []string{"kindnet", "kube-dns", "kube-proxy"}}, + } { + selector, err := metav1.LabelSelectorAsSelector( + &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + sl, + }, + }, + ) + if err != nil { + return err + } + err = wait.For(conditions.New(r).ResourceListMatchN(&v1.PodList{}, len(sl.Values), func(object k8s.Object) bool { + pod, ok := object.(*v1.Pod) + if !ok { + return false + } + + for _, cond := range pod.Status.Conditions { + if cond.Type != v1.PodReady { + continue + } + + return cond.Status == v1.ConditionTrue + } + + return false + }, resources.WithLabelSelector(selector.String()))) + if err != nil { + return err + } + } + return nil +} + +func (p *provisioner) Clean(ctx context.Context, cfg common.Config, instances []common.Instance) error { + // doesn't execute in parallel for the same reasons in Provision + // multipass just cannot handle it + for _, instance := range instances { + func(instance common.Instance) { + err := p.deleteCluster(instance.ID) + if err != nil { + // prevent a failure from stopping the other instances and clean + p.logger.Logf("Delete instance %s failed: %s", instance.Name, err) + } + }(instance) + } + + return nil +} + +func (p *provisioner) clusterExists(name string) (bool, error) { + ret, err := p.kindCmd(nil, "get", "clusters") + if err != nil { + return false, err + } + + for _, c := range strings.Split(ret.stdout, "\n") { + if c == name { + return true, nil + } + } + return false, nil +} + +func (p *provisioner) writeKubeconfig(name string) (string, error) { + kubecfg := fmt.Sprintf("%s-kubecfg", name) + + ret, err := p.kindCmd(nil, "get", "kubeconfig", "--name", name) + if err != nil { + return "", fmt.Errorf("kind get kubeconfig: stderr: %s: %w", ret.stderr, err) + } + + file, err := os.CreateTemp("", fmt.Sprintf("kind-cluster-%s", kubecfg)) + if err != nil { + return "", fmt.Errorf("kind kubeconfig file: %w", err) + } + defer file.Close() + + if n, err := io.WriteString(file, ret.stdout); n == 0 || err != nil { + return "", fmt.Errorf("kind kubecfg file: bytes copied: %d: %w]", n, err) + } + + return file.Name(), nil +} + +type cmdResult struct { + stdout string + stderr string +} + +func (p *provisioner) kindCmd(stdIn io.Reader, args ...string) (cmdResult, error) { + + var stdout, stderr bytes.Buffer + cmd := exec.Command("kind", args...) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if stdIn != nil { + cmd.Stdin = stdIn + } + err := cmd.Run() + return cmdResult{ + stdout: stdout.String(), + stderr: stderr.String(), + }, err +} + +func (p *provisioner) deleteCluster(name string) error { + _, err := p.kindCmd(nil, "delete", "cluster", "--name", name) + return err +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/kubernetes/runner.go b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/runner.go new file mode 100644 index 000000000000..6c5e7641b307 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/runner.go @@ -0,0 +1,135 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package kubernetes + +import ( + "context" + "errors" + "fmt" + devtools "github.com/elastic/beats/v7/dev-tools/mage" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" + "os" + "path/filepath" + "strings" + "time" +) + +// Runner is a handler for running tests against a Kubernetes cluster +type Runner struct{} + +// Prepare configures the host for running the test +func (Runner) Prepare(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, arch string, goVersion string) error { + return nil +} + +// Copy places the required files on the host +func (Runner) Copy(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, repoArchive string, builds []common.Build) error { + return nil +} + +// Run the test +func (Runner) Run(ctx context.Context, verbose bool, sshClient ssh.SSHClient, logger common.Logger, agentVersion string, prefix string, batch define.Batch, env map[string]string) (common.OSRunnerResult, error) { + var goTestFlags []string + rawTestFlags := os.Getenv("GOTEST_FLAGS") + if rawTestFlags != "" { + goTestFlags = strings.Split(rawTestFlags, " ") + } + + maxDuration := 2 * time.Hour + var result []common.OSRunnerPackageResult + for _, pkg := range batch.Tests { + packageTestsStrBuilder := strings.Builder{} + packageTestsStrBuilder.WriteString("^(") + for idx, test := range pkg.Tests { + if idx > 0 { + packageTestsStrBuilder.WriteString("|") + } + packageTestsStrBuilder.WriteString(test.Name) + } + packageTestsStrBuilder.WriteString(")$") + + testPrefix := fmt.Sprintf("%s.%s", prefix, filepath.Base(pkg.Name)) + testName := fmt.Sprintf("k8s-%s", testPrefix) + fileName := fmt.Sprintf("build/TEST-go-%s", testName) + extraFlags := make([]string, 0, len(goTestFlags)+6) + if len(goTestFlags) > 0 { + extraFlags = append(extraFlags, goTestFlags...) + } + extraFlags = append(extraFlags, "-test.shuffle", "on", + "-test.timeout", maxDuration.String(), "-test.run", packageTestsStrBuilder.String()) + + env["AGENT_VERSION"] = agentVersion + env["TEST_DEFINE_PREFIX"] = testPrefix + + buildFolderAbsPath, err := filepath.Abs("build") + if err != nil { + return common.OSRunnerResult{}, err + } + + podLogsPath := filepath.Join(buildFolderAbsPath, fmt.Sprintf("k8s-logs-%s", testPrefix)) + err = os.Mkdir(podLogsPath, 0755) + if err != nil && !errors.Is(err, os.ErrExist) { + return common.OSRunnerResult{}, err + } + + env["K8S_TESTS_POD_LOGS_BASE"] = podLogsPath + + params := devtools.GoTestArgs{ + TestName: testName, + OutputFile: fileName + ".out", + JUnitReportFile: fileName + ".xml", + Packages: []string{pkg.Name}, + Tags: []string{"integration", "kubernetes"}, + ExtraFlags: extraFlags, + Env: env, + } + err = devtools.GoTest(ctx, params) + if err != nil { + return common.OSRunnerResult{}, err + } + + var resultPkg common.OSRunnerPackageResult + resultPkg.Name = pkg.Name + outputPath := fmt.Sprintf("build/TEST-go-k8s-%s.%s", prefix, filepath.Base(pkg.Name)) + resultPkg.Output, err = os.ReadFile(outputPath + ".out") + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("failed to fetched test output at %s.out", outputPath) + } + resultPkg.JSONOutput, err = os.ReadFile(outputPath + ".out.json") + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("failed to fetched test output at %s.out.json", outputPath) + } + resultPkg.XMLOutput, err = os.ReadFile(outputPath + ".xml") + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("failed to fetched test output at %s.xml", outputPath) + } + result = append(result, resultPkg) + } + + return common.OSRunnerResult{ + Packages: result, + }, nil +} + +// Diagnostics gathers any diagnostics from the host. +func (Runner) Diagnostics(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, destination string) error { + // does nothing for kubernetes + return nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/kubernetes/supported.go b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/supported.go new file mode 100644 index 000000000000..125e7f92fe79 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/kubernetes/supported.go @@ -0,0 +1,117 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package kubernetes + +import ( + "errors" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" +) + +// ErrUnknownDockerVariant is the error returned when the variant is unknown. +var ErrUnknownDockerVariant = errors.New("unknown docker variant type") + +// arches defines the list of supported architectures of Kubernetes +var arches = []string{define.AMD64, define.ARM64} + +// versions defines the list of supported version of Kubernetes. +var versions = []define.OS{ + // Kubernetes 1.31 + { + Type: define.Kubernetes, + Version: "1.31.0", + }, + // Kubernetes 1.30 + { + Type: define.Kubernetes, + Version: "1.30.2", + }, + // Kubernetes 1.29 + { + Type: define.Kubernetes, + Version: "1.29.4", + }, + // Kubernetes 1.28 + { + Type: define.Kubernetes, + Version: "1.28.9", + }, +} + +// variants defines the list of variants and the image name for that variant. +// +// Note: This cannot be a simple map as the order matters. We need the +// one that we want to be the default test to be first. +var variants = []struct { + Name string + Image string +}{ + { + Name: "basic", + Image: "docker.elastic.co/beats/elastic-agent", + }, + { + Name: "ubi", + Image: "docker.elastic.co/beats/elastic-agent-ubi", + }, + { + Name: "wolfi", + Image: "docker.elastic.co/beats/elastic-agent-wolfi", + }, + { + Name: "complete", + Image: "docker.elastic.co/beats/elastic-agent-complete", + }, + { + Name: "complete-wolfi", + Image: "docker.elastic.co/beats/elastic-agent-complete-wolfi", + }, + { + Name: "cloud", + Image: "docker.elastic.co/beats-ci/elastic-agent-cloud", + }, + { + Name: "service", + Image: "docker.elastic.co/beats-ci/elastic-agent-service", + }, +} + +// GetSupported returns the list of supported OS types for Kubernetes. +func GetSupported() []define.OS { + supported := make([]define.OS, 0, len(versions)*len(variants)*2) + for _, a := range arches { + for _, v := range versions { + for _, variant := range variants { + c := v + c.Arch = a + c.DockerVariant = variant.Name + supported = append(supported, c) + } + } + } + return supported +} + +// VariantToImage returns the image name from the variant. +func VariantToImage(variant string) (string, error) { + for _, v := range variants { + if v.Name == variant { + return v.Image, nil + } + } + return "", ErrUnknownDockerVariant +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/linux/debian.go b/dev-tools/mage/target/srvrlesstest/testing/linux/debian.go new file mode 100644 index 000000000000..acecdb7969af --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/linux/debian.go @@ -0,0 +1,219 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package linux + +import ( + "context" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" + "path" + "path/filepath" + "strings" + "time" +) + +// DebianRunner is a handler for running tests on Linux +type DebianRunner struct{} + +// Prepare the test +func (DebianRunner) Prepare(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, arch string, goVersion string) error { + // prepare build-essential and unzip + // + // apt-get update and install are so terrible that we have to place this in a loop, because in some cases the + // apt-get update says it works, but it actually fails. so we add 3 tries here + var err error + for i := 0; i < 3; i++ { + err = func() error { + updateCtx, updateCancel := context.WithTimeout(ctx, 3*time.Minute) + defer updateCancel() + logger.Logf("Running apt-get update") + // `-o APT::Update::Error-Mode=any` ensures that any warning is tried as an error, so the retry + // will occur (without this we get random failures) + stdOut, errOut, err := sshClient.ExecWithRetry(updateCtx, "sudo", []string{"apt-get", "update", "-o APT::Update::Error-Mode=any"}, 15*time.Second) + if err != nil { + return fmt.Errorf("failed to run apt-get update: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + return func() error { + // golang is installed below and not using the package manager, ensures that the exact version + // of golang is used for the running of the test + installCtx, installCancel := context.WithTimeout(ctx, 1*time.Minute) + defer installCancel() + logger.Logf("Install build-essential and unzip") + stdOut, errOut, err = sshClient.ExecWithRetry(installCtx, "sudo", []string{"apt-get", "install", "-y", "build-essential", "unzip"}, 5*time.Second) + if err != nil { + return fmt.Errorf("failed to install build-essential and unzip: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + return nil + }() + }() + if err == nil { + // installation was successful + break + } + logger.Logf("Failed to install build-essential and unzip; will wait 15 seconds and try again") + <-time.After(15 * time.Second) + } + if err != nil { + // seems after 3 tries it still failed + return err + } + + // prepare golang + logger.Logf("Install golang %s (%s)", goVersion, arch) + downloadURL := fmt.Sprintf("https://go.dev/dl/go%s.linux-%s.tar.gz", goVersion, arch) + filename := path.Base(downloadURL) + stdOut, errOut, err := sshClient.Exec(ctx, "curl", []string{"-Ls", downloadURL, "--output", filename}, nil) + if err != nil { + return fmt.Errorf("failed to download go from %s with curl: %w (stdout: %s, stderr: %s)", downloadURL, err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(ctx, "sudo", []string{"tar", "-C", "/usr/local", "-xzf", filename}, nil) + if err != nil { + return fmt.Errorf("failed to extract go to /usr/local with tar: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(ctx, "sudo", []string{"ln", "-s", "/usr/local/go/bin/go", "/usr/bin/go"}, nil) + if err != nil { + return fmt.Errorf("failed to symlink /usr/local/go/bin/go to /usr/bin/go: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(ctx, "sudo", []string{"ln", "-s", "/usr/local/go/bin/gofmt", "/usr/bin/gofmt"}, nil) + if err != nil { + return fmt.Errorf("failed to symlink /usr/local/go/bin/gofmt to /usr/bin/gofmt: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + return nil +} + +// Copy places the required files on the host. +func (DebianRunner) Copy(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, repoArchive string, builds []common.Build) error { + return linuxCopy(ctx, sshClient, logger, repoArchive, builds) +} + +// Run the test +func (DebianRunner) Run(ctx context.Context, verbose bool, sshClient ssh.SSHClient, logger common.Logger, agentVersion string, prefix string, batch define.Batch, env map[string]string) (common.OSRunnerResult, error) { + var tests []string + for _, pkg := range batch.Tests { + for _, test := range pkg.Tests { + tests = append(tests, fmt.Sprintf("%s:%s", pkg.Name, test.Name)) + } + } + var sudoTests []string + for _, pkg := range batch.SudoTests { + for _, test := range pkg.Tests { + sudoTests = append(sudoTests, fmt.Sprintf("%s:%s", pkg.Name, test.Name)) + } + } + + logArg := "" + if verbose { + logArg = "-v" + } + var result common.OSRunnerResult + if len(tests) > 0 { + vars := fmt.Sprintf(`GOPATH="$HOME/go" PATH="$HOME/go/bin:$PATH" AGENT_VERSION="%s" TEST_DEFINE_PREFIX="%s" TEST_DEFINE_TESTS="%s"`, agentVersion, prefix, strings.Join(tests, ",")) + vars = extendVars(vars, env) + + script := fmt.Sprintf(`cd agent && %s ~/go/bin/mage %s integration:testOnRemote`, vars, logArg) + results, err := runTests(ctx, logger, "non-sudo", prefix, script, sshClient, batch.Tests) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("error running non-sudo tests: %w", err) + } + result.Packages = results + } + + if len(sudoTests) > 0 { + prefix := fmt.Sprintf("%s-sudo", prefix) + vars := fmt.Sprintf(`GOPATH="$HOME/go" PATH="$HOME/go/bin:$PATH" AGENT_VERSION="%s" TEST_DEFINE_PREFIX="%s" TEST_DEFINE_TESTS="%s"`, agentVersion, prefix, strings.Join(sudoTests, ",")) + vars = extendVars(vars, env) + script := fmt.Sprintf(`cd agent && sudo %s ~/go/bin/mage %s integration:testOnRemote`, vars, logArg) + + results, err := runTests(ctx, logger, "sudo", prefix, script, sshClient, batch.SudoTests) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("error running sudo tests: %w", err) + } + result.SudoPackages = results + } + + return result, nil +} + +// Diagnostics gathers any diagnostics from the host. +func (DebianRunner) Diagnostics(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, destination string) error { + return linuxDiagnostics(ctx, sshClient, logger, destination) +} + +func runTests(ctx context.Context, logger common.Logger, name string, prefix string, script string, sshClient ssh.SSHClient, tests []define.BatchPackageTests) ([]common.OSRunnerPackageResult, error) { + execTest := strings.NewReader(script) + + session, err := sshClient.NewSession() + if err != nil { + return nil, fmt.Errorf("failed to start session: %w", err) + } + + session.Stdout = common.NewPrefixOutput(logger, fmt.Sprintf("Test output (%s) (stdout): ", name)) + session.Stderr = common.NewPrefixOutput(logger, fmt.Sprintf("Test output (%s) (stderr): ", name)) + session.Stdin = execTest + + // allowed to fail because tests might fail + logger.Logf("Running %s tests...", name) + err = session.Run("bash") + if err != nil { + logger.Logf("%s tests failed: %s", name, err) + } + // this seems to always return an error + _ = session.Close() + + var result []common.OSRunnerPackageResult + // fetch the contents for each package + for _, pkg := range tests { + resultPkg, err := getRunnerPackageResult(ctx, sshClient, pkg, prefix) + if err != nil { + return nil, err + } + result = append(result, resultPkg) + } + return result, nil +} + +func getRunnerPackageResult(ctx context.Context, sshClient ssh.SSHClient, pkg define.BatchPackageTests, prefix string) (common.OSRunnerPackageResult, error) { + var err error + var resultPkg common.OSRunnerPackageResult + resultPkg.Name = pkg.Name + outputPath := fmt.Sprintf("$HOME/agent/build/TEST-go-remote-%s.%s", prefix, filepath.Base(pkg.Name)) + resultPkg.Output, err = sshClient.GetFileContents(ctx, outputPath+".out") + if err != nil { + return common.OSRunnerPackageResult{}, fmt.Errorf("failed to fetched test output at %s.out", outputPath) + } + resultPkg.JSONOutput, err = sshClient.GetFileContents(ctx, outputPath+".out.json") + if err != nil { + return common.OSRunnerPackageResult{}, fmt.Errorf("failed to fetched test output at %s.out.json", outputPath) + } + resultPkg.XMLOutput, err = sshClient.GetFileContents(ctx, outputPath+".xml") + if err != nil { + return common.OSRunnerPackageResult{}, fmt.Errorf("failed to fetched test output at %s.xml", outputPath) + } + return resultPkg, nil +} + +func extendVars(vars string, env map[string]string) string { + var envStr []string + for k, v := range env { + envStr = append(envStr, fmt.Sprintf(`%s="%s"`, k, v)) + } + return fmt.Sprintf("%s %s", vars, strings.Join(envStr, " ")) +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/linux/linux.go b/dev-tools/mage/target/srvrlesstest/testing/linux/linux.go new file mode 100644 index 000000000000..2aa9564e5068 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/linux/linux.go @@ -0,0 +1,169 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package linux + +import ( + "context" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" + "os" + "path/filepath" + "strings" +) + +func linuxDiagnostics(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, destination string) error { + // take ownership, as sudo tests will create with root permissions (allow to fail in the case it doesn't exist) + diagnosticDir := "$HOME/agent/build/diagnostics" + _, _, _ = sshClient.Exec(ctx, "sudo", []string{"chown", "-R", "$USER:$USER", diagnosticDir}, nil) + stdOut, _, err := sshClient.Exec(ctx, "ls", []string{"-1", diagnosticDir}, nil) + if err != nil { + //nolint:nilerr // failed to list the directory, probably don't have any diagnostics (do nothing) + return nil + } + eachDiagnostic := strings.Split(string(stdOut), "\n") + for _, filename := range eachDiagnostic { + filename = strings.TrimSpace(filename) + if filename == "" { + continue + } + + // don't use filepath.Join as we need this to work in Windows as well + // this is because if we use `filepath.Join` on a Windows host connected to a Linux host + // it will use a `\` and that will be incorrect for Linux + fp := fmt.Sprintf("%s/%s", diagnosticDir, filename) + // use filepath.Join on this path because it's a path on this specific host platform + dp := filepath.Join(destination, filename) + logger.Logf("Copying diagnostic %s", filename) + out, err := os.Create(dp) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", dp, err) + } + err = sshClient.GetFileContentsOutput(ctx, fp, out) + _ = out.Close() + if err != nil { + return fmt.Errorf("failed to copy file from remote host to %s: %w", dp, err) + } + } + return nil +} + +func linuxCopy(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, repoArchive string, builds []common.Build) error { + // copy the archive and extract it on the host + logger.Logf("Copying repo") + destRepoName := filepath.Base(repoArchive) + err := sshClient.Copy(repoArchive, destRepoName) + if err != nil { + return fmt.Errorf("failed to SCP repo archive %s: %w", repoArchive, err) + } + + // remove build paths, on cases where the build path is different from agent. + for _, build := range builds { + for _, remoteBuildPath := range []string{build.Path, build.SHA512Path} { + relativeAgentDir := filepath.Join("agent", remoteBuildPath) + _, _, err := sshClient.Exec(ctx, "sudo", []string{"rm", "-rf", relativeAgentDir}, nil) + // doesn't need to be a fatal error. + if err != nil { + logger.Logf("error removing build dir %s: %w", relativeAgentDir, err) + } + } + } + + // ensure that agent directory is removed (possible it already exists if instance already used) + stdout, stderr, err := sshClient.Exec(ctx, + "sudo", []string{"rm", "-rf", "agent"}, nil) + if err != nil { + return fmt.Errorf( + "failed to remove agent directory before unziping new one: %w. stdout: %q, stderr: %q", + err, stdout, stderr) + } + + stdOut, errOut, err := sshClient.Exec(ctx, "unzip", []string{destRepoName, "-d", "agent"}, nil) + if err != nil { + return fmt.Errorf("failed to unzip %s to agent directory: %w (stdout: %s, stderr: %s)", destRepoName, err, stdOut, errOut) + } + + // prepare for testing + logger.Logf("Running make mage and prepareOnRemote") + envs := `GOPATH="$HOME/go" PATH="$HOME/go/bin:$PATH"` + installMage := strings.NewReader(fmt.Sprintf(`cd agent && %s make mage && %s mage integration:prepareOnRemote`, envs, envs)) + stdOut, errOut, err = sshClient.Exec(ctx, "bash", nil, installMage) + if err != nil { + return fmt.Errorf("failed to perform make mage and prepareOnRemote: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + // determine if the build needs to be replaced on the host + // if it already exists and the SHA512 are the same contents, then + // there is no reason to waste time uploading the build + for _, build := range builds { + copyBuild := true + localSHA512, err := os.ReadFile(build.SHA512Path) + if err != nil { + return fmt.Errorf("failed to read local SHA52 contents %s: %w", build.SHA512Path, err) + } + hostSHA512Path := filepath.Base(build.SHA512Path) + hostSHA512, err := sshClient.GetFileContents(ctx, hostSHA512Path) + if err == nil { + if string(localSHA512) == string(hostSHA512) { + logger.Logf("Skipping copy agent build %s; already the same", filepath.Base(build.Path)) + copyBuild = false + } + } + + if copyBuild { + // ensure the existing copies are removed first + toRemove := filepath.Base(build.Path) + stdOut, errOut, err = sshClient.Exec(ctx, + "sudo", []string{"rm", "-f", toRemove}, nil) + if err != nil { + return fmt.Errorf("failed to remove %q: %w (stdout: %q, stderr: %q)", + toRemove, err, stdOut, errOut) + } + + toRemove = filepath.Base(build.SHA512Path) + stdOut, errOut, err = sshClient.Exec(ctx, + "sudo", []string{"rm", "-f", toRemove}, nil) + if err != nil { + return fmt.Errorf("failed to remove %q: %w (stdout: %q, stderr: %q)", + toRemove, err, stdOut, errOut) + } + + logger.Logf("Copying agent build %s", filepath.Base(build.Path)) + } + + for _, buildPath := range []string{build.Path, build.SHA512Path} { + if copyBuild { + err = sshClient.Copy(buildPath, filepath.Base(buildPath)) + if err != nil { + return fmt.Errorf("failed to SCP build %s: %w", filepath.Base(buildPath), err) + } + } + insideAgentDir := filepath.Join("agent", buildPath) + stdOut, errOut, err = sshClient.Exec(ctx, "mkdir", []string{"-p", filepath.Dir(insideAgentDir)}, nil) + if err != nil { + return fmt.Errorf("failed to create %s directory: %w (stdout: %s, stderr: %s)", filepath.Dir(insideAgentDir), err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(ctx, "ln", []string{filepath.Base(buildPath), insideAgentDir}, nil) + if err != nil { + return fmt.Errorf("failed to hard link %s to %s: %w (stdout: %s, stderr: %s)", filepath.Base(buildPath), insideAgentDir, err, stdOut, errOut) + } + } + } + + return nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/linux/rhel.go b/dev-tools/mage/target/srvrlesstest/testing/linux/rhel.go new file mode 100644 index 000000000000..f8d0bb1f6d26 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/linux/rhel.go @@ -0,0 +1,126 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package linux + +import ( + "context" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" + "path" + "strings" + "time" +) + +// RhelRunner is a handler for running tests on SUSE Linux Enterpriser Server +type RhelRunner struct{} + +// Prepare configures the host for running the test +func (RhelRunner) Prepare(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, arch string, goVersion string) error { + logger.Logf("Install development tools") + dnfCtx, dnfCancel := context.WithTimeout(ctx, 20*time.Minute) + defer dnfCancel() + stdOut, errOut, err := sshClient.ExecWithRetry(dnfCtx, "sudo", []string{"dnf", "-y", "-v", "group", "install", "\"Development Tools\""}, 15*time.Second) + if err != nil { + return fmt.Errorf("failed to run 'dnf group install \"Development Tools\"': %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + // install golang + logger.Logf("Install golang %s (%s)", goVersion, arch) + goCtx, goCancel := context.WithTimeout(ctx, 20*time.Minute) + defer goCancel() + downloadURL := fmt.Sprintf("https://go.dev/dl/go%s.linux-%s.tar.gz", goVersion, arch) + filename := path.Base(downloadURL) + stdOut, errOut, err = sshClient.Exec(goCtx, "curl", []string{"-Ls", downloadURL, "--output", filename}, nil) + if err != nil { + return fmt.Errorf("failed to download go from %s with curl: %w (stdout: %s, stderr: %s)", downloadURL, err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(goCtx, "sudo", []string{"tar", "-C", "/usr/local", "-xzf", filename}, nil) + if err != nil { + return fmt.Errorf("failed to extract go to /usr/local with tar: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(goCtx, "sudo", []string{"ln", "-s", "/usr/local/go/bin/go", "/usr/bin/go"}, nil) + if err != nil { + return fmt.Errorf("failed to symlink /usr/local/go/bin/go to /usr/bin/go: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(goCtx, "sudo", []string{"ln", "-s", "/usr/local/go/bin/gofmt", "/usr/bin/gofmt"}, nil) + if err != nil { + return fmt.Errorf("failed to symlink /usr/local/go/bin/gofmt to /usr/bin/gofmt: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + return nil +} + +// Copy places the required files on the host +func (RhelRunner) Copy(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, repoArchive string, builds []common.Build) error { + return linuxCopy(ctx, sshClient, logger, repoArchive, builds) +} + +// Run the test +func (RhelRunner) Run(ctx context.Context, verbose bool, sshClient ssh.SSHClient, logger common.Logger, agentVersion string, prefix string, batch define.Batch, env map[string]string) (common.OSRunnerResult, error) { + var tests []string + for _, pkg := range batch.Tests { + for _, test := range pkg.Tests { + tests = append(tests, fmt.Sprintf("%s:%s", pkg.Name, test.Name)) + } + } + var sudoTests []string + for _, pkg := range batch.SudoTests { + for _, test := range pkg.Tests { + sudoTests = append(sudoTests, fmt.Sprintf("%s:%s", pkg.Name, test.Name)) + } + } + + logArg := "" + if verbose { + logArg = "-v" + } + var result common.OSRunnerResult + if len(tests) > 0 { + vars := fmt.Sprintf(`GOPATH="$HOME/go" PATH="$HOME/go/bin:$PATH" AGENT_VERSION="%s" TEST_DEFINE_PREFIX="%s" TEST_DEFINE_TESTS="%s"`, agentVersion, prefix, strings.Join(tests, ",")) + vars = extendVars(vars, env) + + script := fmt.Sprintf(`cd agent && %s ~/go/bin/mage %s integration:testOnRemote`, vars, logArg) + results, err := runTests(ctx, logger, "non-sudo", prefix, script, sshClient, batch.Tests) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("error running non-sudo tests: %w", err) + } + result.Packages = results + } + + if len(sudoTests) > 0 { + prefix := fmt.Sprintf("%s-sudo", prefix) + vars := fmt.Sprintf(`GOPATH="$HOME/go" PATH="$HOME/go/bin:$PATH:/usr/sbin" AGENT_VERSION="%s" TEST_DEFINE_PREFIX="%s" TEST_DEFINE_TESTS="%s"`, agentVersion, prefix, strings.Join(sudoTests, ",")) + vars = extendVars(vars, env) + script := fmt.Sprintf(`cd agent && sudo %s ~/go/bin/mage %s integration:testOnRemote`, vars, logArg) + + results, err := runTests(ctx, logger, "sudo", prefix, script, sshClient, batch.SudoTests) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("error running sudo tests: %w", err) + } + result.SudoPackages = results + } + + return result, nil +} + +// Diagnostics gathers any diagnostics from the host. +func (RhelRunner) Diagnostics(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, destination string) error { + return linuxDiagnostics(ctx, sshClient, logger, destination) +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/log.go b/dev-tools/mage/target/srvrlesstest/testing/log.go new file mode 100644 index 000000000000..eeedc57fc88b --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/log.go @@ -0,0 +1,157 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package testing + +import ( + "bytes" + "encoding/json" + "errors" + "strings" + + "github.com/elastic/elastic-agent-libs/logp" +) + +// Logger is log interface that matches *testing.T. +type Logger interface { + // Log logs the arguments. + Log(args ...any) + // Logf logs the formatted arguments. + Logf(format string, args ...any) +} + +// logWatcher is an `io.Writer` that processes the log lines outputted from the spawned Elastic Agent. +// +// `Write` handles parsing lines as either ndjson or plain text. +type logWatcher struct { + remainder []byte + replicate Logger + alert chan error +} + +func newLogWatcher(replicate Logger) *logWatcher { + return &logWatcher{ + replicate: replicate, + alert: make(chan error), + } +} + +// Watch returns the channel that will get an error when an error is identified from the log. +func (r *logWatcher) Watch() <-chan error { + return r.alert +} + +// Write implements the `io.Writer` interface. +func (r *logWatcher) Write(p []byte) (int, error) { + if len(p) == 0 { + // nothing to do + return 0, nil + } + offset := 0 + for { + idx := bytes.IndexByte(p[offset:], '\n') + if idx < 0 { + // not all used add to remainder to be used on next call + r.remainder = append(r.remainder, p[offset:]...) + return len(p), nil + } + + var line []byte + if r.remainder != nil { + line = r.remainder + r.remainder = nil + line = append(line, p[offset:offset+idx]...) + } else { + line = append(line, p[offset:offset+idx]...) + } + offset += idx + 1 + // drop '\r' from line (needed for Windows) + if len(line) > 0 && line[len(line)-1] == '\r' { + line = line[0 : len(line)-1] + } + if len(line) == 0 { + // empty line + continue + } + str := strings.TrimSpace(string(line)) + // try to parse line as JSON + if str[0] == '{' && r.handleJSON(str) { + // handled as JSON + continue + } + // considered standard text being it's not JSON, just replicate + if r.replicate != nil { + r.replicate.Log(str) + } + } +} + +func (r *logWatcher) handleJSON(line string) bool { + var evt map[string]interface{} + if err := json.Unmarshal([]byte(line), &evt); err != nil { + return false + } + if r.replicate != nil { + r.replicate.Log(line) + } + lvl := getLevel(evt, "log.level") + msg := getMessage(evt, "message") + if lvl == logp.ErrorLevel { + r.alert <- errors.New(msg) + } + return true +} + +func getLevel(evt map[string]interface{}, key string) logp.Level { + lvl := logp.InfoLevel + err := unmarshalLevel(&lvl, getStrVal(evt, key)) + if err == nil { + delete(evt, key) + } + return lvl +} + +func unmarshalLevel(lvl *logp.Level, val string) error { + if val == "" { + return errors.New("empty val") + } else if val == "trace" { + // logp doesn't handle trace level we cast to debug + *lvl = logp.DebugLevel + return nil + } + return lvl.Unpack(val) +} + +func getMessage(evt map[string]interface{}, key string) string { + msg := getStrVal(evt, key) + if msg != "" { + delete(evt, key) + } + return msg +} + +func getStrVal(evt map[string]interface{}, key string) string { + raw, ok := evt[key] + if !ok { + return "" + } + str, ok := raw.(string) + if !ok { + return "" + } + return str +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/multipas/provisioner.go b/dev-tools/mage/target/srvrlesstest/testing/multipas/provisioner.go new file mode 100644 index 000000000000..d0718d07833f --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/multipas/provisioner.go @@ -0,0 +1,330 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package multipass + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/core/process" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/runner" + "os" + "os/exec" + "path/filepath" + "runtime" + "time" + + "gopkg.in/yaml.v2" +) + +const ( + Ubuntu = "ubuntu" + Name = "multipass" +) + +type provisioner struct { + logger common.Logger +} + +// NewProvisioner creates the multipass provisioner +func NewProvisioner() common.InstanceProvisioner { + return &provisioner{} +} + +func (p *provisioner) Name() string { + return Name +} + +func (p *provisioner) SetLogger(l common.Logger) { + p.logger = l +} + +func (p *provisioner) Type() common.ProvisionerType { + return common.ProvisionerTypeVM +} + +// Supported returns true if multipass supports this OS. +// +// multipass only supports Ubuntu on the same architecture as the running host. +func (p *provisioner) Supported(os define.OS) bool { + if os.Type != define.Linux { + return false + } + if os.Distro != Ubuntu { + return false + } + if os.Version != "20.04" && os.Version != "22.04" && os.Version != "24.04" { + return false + } + // multipass only supports the same architecture of the host + if os.Arch != runtime.GOARCH { + return false + } + return true +} + +func (p *provisioner) Provision(ctx context.Context, cfg common.Config, batches []common.OSBatch) ([]common.Instance, error) { + // this doesn't provision the instances in parallel on purpose + // multipass cannot handle it, it either results in instances sharing the same IP address + // or some instances stuck in Starting state + for _, batch := range batches { + err := func(batch common.OSBatch) error { + launchCtx, launchCancel := context.WithTimeout(ctx, 5*time.Minute) + defer launchCancel() + err := p.launch(launchCtx, cfg, batch) + if err != nil { + return fmt.Errorf("instance %s failed: %w", batch.ID, err) + } + return nil + }(batch) + if err != nil { + return nil, err + } + } + + var results []common.Instance + instances, err := p.list(ctx) + if err != nil { + return nil, err + } + for _, batch := range batches { + mi, ok := instances[batch.ID] + if !ok { + return nil, fmt.Errorf("failed to find %s in multipass list output", batch.ID) + } + if mi.State != "Running" { + return nil, fmt.Errorf("instance %s is not marked as running", batch.ID) + } + results = append(results, common.Instance{ + ID: batch.ID, + Provisioner: Name, + Name: batch.ID, + IP: mi.IPv4[0], + Username: "ubuntu", + RemotePath: "/home/ubuntu/agent", + Internal: nil, + }) + } + return results, nil +} + +// Clean cleans up all provisioned resources. +func (p *provisioner) Clean(ctx context.Context, _ common.Config, instances []common.Instance) error { + // doesn't execute in parallel for the same reasons in Provision + // multipass just cannot handle it + for _, instance := range instances { + func(instance common.Instance) { + deleteCtx, deleteCancel := context.WithTimeout(ctx, 5*time.Minute) + defer deleteCancel() + err := p.delete(deleteCtx, instance) + if err != nil { + // prevent a failure from stopping the other instances and clean + p.logger.Logf("Delete instance %s failed: %s", instance.Name, err) + } + }(instance) + } + return nil +} + +// launch creates an instance. +func (p *provisioner) launch(ctx context.Context, cfg common.Config, batch common.OSBatch) error { + // check if instance already exists + err := p.ensureInstanceNotExist(ctx, batch) + if err != nil { + p.logger.Logf( + "could not check multipass instance %q does not exists, moving on anyway. Err: %v", err) + } + args := []string{ + "launch", + "-c", "2", + "-d", "50G", // need decent size for all the tests + "-m", "4G", + "-n", batch.ID, + "--cloud-init", "-", + batch.OS.Version, + } + + publicKeyPath := filepath.Join(cfg.StateDir, "id_rsa.pub") + publicKey, err := os.ReadFile(publicKeyPath) + if err != nil { + return fmt.Errorf("failed to read SSH key to send to multipass instance at %s: %w", publicKeyPath, err) + } + + var cloudCfg cloudinitConfig + cloudCfg.SSHAuthorizedKeys = []string{string(publicKey)} + cloudCfgData, err := yaml.Marshal(&cloudCfg) + if err != nil { + return fmt.Errorf("failed to marshal cloud-init configuration: %w", err) + } + + var output bytes.Buffer + p.logger.Logf("Launching multipass image %s", batch.ID) + proc, err := process.Start("multipass", process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(runner.AttachOut(&output), runner.AttachErr(&output))) + if err != nil { + return fmt.Errorf("failed to run multipass launch: %w", err) + } + _, err = proc.Stdin.Write([]byte(fmt.Sprintf("#cloud-config\n%s", cloudCfgData))) + if err != nil { + _ = proc.Stdin.Close() + _ = proc.Kill() + <-proc.Wait() + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to write cloudinit to stdin: %w", err) + } + _ = proc.Stdin.Close() + ps := <-proc.Wait() + if !ps.Success() { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to run multipass launch: exited with code: %d", ps.ExitCode()) + } + return nil +} + +func (p *provisioner) ensureInstanceNotExist(ctx context.Context, batch common.OSBatch) error { + var output bytes.Buffer + var stdErr bytes.Buffer + proc, err := process.Start("multipass", + process.WithContext(ctx), + process.WithArgs([]string{"list", "--format", "json"}), + process.WithCmdOptions( + runner.AttachOut(&output), + runner.AttachErr(&stdErr))) + if err != nil { + return fmt.Errorf("multipass list failed to run: %w", err) + } + + state := <-proc.Wait() + if !state.Success() { + msg := fmt.Sprintf("multipass list exited with non-zero status: %s", + state.String()) + p.logger.Logf(msg) + p.logger.Logf("output: %s", output.String()) + p.logger.Logf("stderr: %s", stdErr.String()) + return errors.New(msg) + } + list := struct { + List []struct { + Ipv4 []string `json:"ipv4"` + Name string `json:"name"` + Release string `json:"release"` + State string `json:"state"` + } `json:"list"` + }{} + err = json.NewDecoder(&output).Decode(&list) + if err != nil { + return fmt.Errorf("could not decode mutipass list output: %w", err) + } + + for _, i := range list.List { + if i.Name == batch.ID { + p.logger.Logf("multipass trying to delete instance %s", batch.ID) + + output.Reset() + stdErr.Reset() + proc, err = process.Start("multipass", + process.WithContext(ctx), + process.WithArgs([]string{"delete", "--purge", batch.ID}), + process.WithCmdOptions( + runner.AttachOut(&output), + runner.AttachErr(&stdErr))) + if err != nil { + return fmt.Errorf( + "multipass instance %q already exist, state %q. Could not delete it: %w", + batch.ID, i.State, err) + } + state = <-proc.Wait() + if !state.Success() { + msg := fmt.Sprintf("failed to delete and purge multipass instance %s: %s", + batch.ID, + state.String()) + p.logger.Logf(msg) + p.logger.Logf("output: %s", output.String()) + p.logger.Logf("stderr: %s", stdErr.String()) + return errors.New(msg) + } + + break + } + } + + return nil +} + +// delete deletes an instance. +func (p *provisioner) delete(ctx context.Context, instance common.Instance) error { + args := []string{ + "delete", + "-p", + instance.ID, + } + + var output bytes.Buffer + p.logger.Logf("Deleting instance %s", instance.Name) + proc, err := process.Start("multipass", process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(runner.AttachOut(&output), runner.AttachErr(&output))) + if err != nil { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to run multipass delete: %w", err) + } + ps := <-proc.Wait() + if ps.ExitCode() != 0 { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to run multipass delete: exited with code: %d", ps.ExitCode()) + } + return nil +} + +// list all the instances. +func (p *provisioner) list(ctx context.Context) (map[string]instance, error) { + cmd := exec.CommandContext(ctx, "multipass", "list", "--format", "yaml") + result, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to run multipass list: %w", err) + } + + // yaml output from multipass gives a list of instances for each instance name, + // even though there is only ever 1 entry in the list + var instancesMulti map[string][]instance + err = yaml.Unmarshal(result, &instancesMulti) + if err != nil { + return nil, fmt.Errorf("failed to parse multipass list output: %w", err) + } + instances := map[string]instance{} + for name, multi := range instancesMulti { + instances[name] = multi[0] + } + + return instances, nil +} + +type instance struct { + State string `yaml:"state"` + IPv4 []string `yaml:"ipv4"` + Release string `yaml:"release"` +} + +type cloudinitConfig struct { + SSHAuthorizedKeys []string `yaml:"ssh_authorized_keys"` +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ogc/api.go b/dev-tools/mage/target/srvrlesstest/testing/ogc/api.go new file mode 100644 index 000000000000..ec9228118b74 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ogc/api.go @@ -0,0 +1,60 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ogc + +import "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + +// Layout definition for `ogc layout import`. +type Layout struct { + Name string `yaml:"name"` + Provider string `yaml:"provider"` + InstanceSize string `yaml:"instance_size"` + RunsOn string `yaml:"runs_on"` + RemotePath string `yaml:"remote_path"` + Scale int `yaml:"scale"` + Username string `yaml:"username"` + SSHPrivateKey string `yaml:"ssh_private_key"` + SSHPublicKey string `yaml:"ssh_public_key"` + Ports []string `yaml:"ports"` + Tags []string `yaml:"tags"` + Labels map[string]string `yaml:"labels"` + Scripts string `yaml:"scripts"` +} + +// Machine definition returned by `ogc up`. +type Machine struct { + ID int `yaml:"id"` + InstanceID string `yaml:"instance_id"` + InstanceName string `yaml:"instance_name"` + InstanceState string `yaml:"instance_state"` + PrivateIP string `yaml:"private_ip"` + PublicIP string `yaml:"public_ip"` + Layout Layout `yaml:"layout"` + Create string `yaml:"created"` +} + +// LayoutOS defines the minimal information for a mapping of an OS to the +// provider, instance size, and runs on for that OS. +type LayoutOS struct { + OS define.OS + Provider string + InstanceSize string + RunsOn string + Username string + RemotePath string +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ogc/config.go b/dev-tools/mage/target/srvrlesstest/testing/ogc/config.go new file mode 100644 index 000000000000..7a65e856c706 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ogc/config.go @@ -0,0 +1,100 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ogc + +import ( + "encoding/json" + "errors" + "fmt" + "os" +) + +// Config is the configuration for using OGC. +type Config struct { + ServiceTokenPath string + Datacenter string + + content *serviceTokenContent +} + +// Validate returns an error if the information is invalid. +func (c *Config) Validate() error { + if c.ServiceTokenPath == "" { + return errors.New("field ServiceTokenPath must be set") + } + if c.Datacenter == "" { + return errors.New("field Datacenter must be set") + } + return c.ensureParsed() +} + +// ProjectID returns the project ID from the service token. +func (c *Config) ProjectID() (string, error) { + err := c.ensureParsed() + if err != nil { + return "", err + } + return c.content.ProjectID, nil +} + +// ClientEmail returns the client email from the service token. +func (c *Config) ClientEmail() (string, error) { + err := c.ensureParsed() + if err != nil { + return "", err + } + return c.content.ClientEmail, nil +} + +func (c *Config) ensureParsed() error { + if c.content != nil { + // already parsed + return nil + } + content, err := c.parse() + if err != nil { + return err + } + c.content = content + return nil +} + +func (c *Config) parse() (*serviceTokenContent, error) { + var content serviceTokenContent + raw, err := os.ReadFile(c.ServiceTokenPath) + if err != nil { + return nil, fmt.Errorf("failed to read contents of %s: %w", c.ServiceTokenPath, err) + } + err = json.Unmarshal(raw, &content) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON contents of %s: %w", c.ServiceTokenPath, err) + } + if content.Type != "service_account" { + return nil, fmt.Errorf("not a service account token at %s; type != service_account", c.ServiceTokenPath) + } + return &content, nil +} + +// serviceTokenContent is parsed content from a service token file. +type serviceTokenContent struct { + Type string `json:"type"` + ProjectID string `json:"project_id"` + ClientEmail string `json:"client_email"` + + // more fields exists but we only need the provided information +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ogc/provisioner.go b/dev-tools/mage/target/srvrlesstest/testing/ogc/provisioner.go new file mode 100644 index 000000000000..eb05e9922d99 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ogc/provisioner.go @@ -0,0 +1,354 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ogc + +import ( + "bytes" + "context" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/core/process" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/runner" + "os" + "path/filepath" + "strings" + "time" + + "gopkg.in/yaml.v2" +) + +const ( + // LayoutIntegrationTag is the tag added to all layouts for the integration testing framework. + LayoutIntegrationTag = "agent-integration" + Name = "ogc" +) + +type provisioner struct { + logger common.Logger + cfg Config +} + +// NewProvisioner creates the OGC provisioner +func NewProvisioner(cfg Config) (common.InstanceProvisioner, error) { + err := cfg.Validate() + if err != nil { + return nil, err + } + return &provisioner{ + cfg: cfg, + }, nil +} + +func (p *provisioner) Name() string { + return Name +} + +func (p *provisioner) SetLogger(l common.Logger) { + p.logger = l +} + +func (p *provisioner) Type() common.ProvisionerType { + return common.ProvisionerTypeVM +} + +// Supported returns true when we support this OS for OGC. +func (p *provisioner) Supported(os define.OS) bool { + _, ok := findOSLayout(os) + return ok +} + +func (p *provisioner) Provision(ctx context.Context, cfg common.Config, batches []common.OSBatch) ([]common.Instance, error) { + // ensure the latest version + pullCtx, pullCancel := context.WithTimeout(ctx, 5*time.Minute) + defer pullCancel() + err := p.ogcPull(pullCtx) + if err != nil { + return nil, err + } + + // import the calculated layouts + importCtx, importCancel := context.WithTimeout(ctx, 30*time.Second) + defer importCancel() + err = p.ogcImport(importCtx, cfg, batches) + if err != nil { + return nil, err + } + + // bring up all the instances + upCtx, upCancel := context.WithTimeout(ctx, 30*time.Minute) + defer upCancel() + upOutput, err := p.ogcUp(upCtx) + if err != nil { + return nil, fmt.Errorf("ogc up failed: %w", err) + } + + // fetch the machines and run the batches on the machine + machines, err := p.ogcMachines(ctx) + if err != nil { + return nil, err + } + if len(machines) == 0 { + // Print the output so its clear what went wrong. + // Without this it's unclear where OGC went wrong, it + // doesn't do a great job of reporting a clean error + fmt.Fprintf(os.Stdout, "%s\n", upOutput) + return nil, fmt.Errorf("ogc didn't create any machines") + } + + // map the machines to instances + var instances []common.Instance + for _, b := range batches { + machine, ok := findMachine(machines, b.ID) + if !ok { + // print the output so its clear what went wrong. + // Without this it's unclear where OGC went wrong, it + // doesn't do a great job of reporting a clean error + fmt.Fprintf(os.Stdout, "%s\n", upOutput) + return nil, fmt.Errorf("failed to find machine for batch ID: %s", b.ID) + } + instances = append(instances, common.Instance{ + ID: b.ID, + Provisioner: Name, + Name: machine.InstanceName, + IP: machine.PublicIP, + Username: machine.Layout.Username, + RemotePath: machine.Layout.RemotePath, + Internal: map[string]interface{}{ + "instance_id": machine.InstanceID, + }, + }) + } + return instances, nil +} + +// Clean cleans up all provisioned resources. +func (p *provisioner) Clean(ctx context.Context, cfg common.Config, _ []common.Instance) error { + return p.ogcDown(ctx) +} + +// ogcPull pulls the latest ogc version. +func (p *provisioner) ogcPull(ctx context.Context) error { + args := []string{ + "pull", + "docker.elastic.co/observability-ci/ogc:5.0.1", + } + var output bytes.Buffer + p.logger.Logf("Pulling latest ogc image") + proc, err := process.Start("docker", process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(runner.AttachOut(&output), runner.AttachErr(&output))) + if err != nil { + return fmt.Errorf("failed to run docker ogcPull: %w", err) + } + ps := <-proc.Wait() + if ps.ExitCode() != 0 { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to run ogc pull: docker run exited with code: %d", ps.ExitCode()) + } + return nil +} + +// ogcImport imports all the required batches into OGC. +func (p *provisioner) ogcImport(ctx context.Context, cfg common.Config, batches []common.OSBatch) error { + var layouts []Layout + for _, ob := range batches { + layouts = append(layouts, osBatchToOGC(cfg.StateDir, ob)) + } + layoutData, err := yaml.Marshal(struct { + Layouts []Layout `yaml:"layouts"` + }{ + Layouts: layouts, + }) + if err != nil { + return fmt.Errorf("failed to marshal layouts YAML: %w", err) + } + + var output bytes.Buffer + p.logger.Logf("Import layouts into ogc") + proc, err := p.ogcRun(ctx, []string{"layout", "import"}, true, process.WithCmdOptions(runner.AttachOut(&output), runner.AttachErr(&output))) + if err != nil { + return fmt.Errorf("failed to run ogc import: %w", err) + } + _, err = proc.Stdin.Write(layoutData) + if err != nil { + _ = proc.Stdin.Close() + _ = proc.Kill() + <-proc.Wait() + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to write layouts to stdin: %w", err) + } + _ = proc.Stdin.Close() + ps := <-proc.Wait() + if ps.ExitCode() != 0 { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to run ogc import: docker run exited with code: %d", ps.ExitCode()) + } + return nil +} + +// ogcUp brings up all the instances. +func (p *provisioner) ogcUp(ctx context.Context) ([]byte, error) { + p.logger.Logf("Bring up instances through ogc") + var output bytes.Buffer + proc, err := p.ogcRun(ctx, []string{"up", LayoutIntegrationTag}, false, process.WithCmdOptions(runner.AttachOut(&output), runner.AttachErr(&output))) + if err != nil { + return nil, fmt.Errorf("failed to run ogc up: %w", err) + } + ps := <-proc.Wait() + if ps.ExitCode() != 0 { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return nil, fmt.Errorf("failed to run ogc up: docker run exited with code: %d", ps.ExitCode()) + } + return output.Bytes(), nil +} + +// ogcDown brings down all the instances. +func (p *provisioner) ogcDown(ctx context.Context) error { + p.logger.Logf("Bring down instances through ogc") + var output bytes.Buffer + proc, err := p.ogcRun(ctx, []string{"down", LayoutIntegrationTag}, false, process.WithCmdOptions(runner.AttachOut(&output), runner.AttachErr(&output))) + if err != nil { + return fmt.Errorf("failed to run ogc down: %w", err) + } + ps := <-proc.Wait() + if ps.ExitCode() != 0 { + // print the output so its clear what went wrong + fmt.Fprintf(os.Stdout, "%s\n", output.Bytes()) + return fmt.Errorf("failed to run ogc down: docker run exited with code: %d", ps.ExitCode()) + } + return nil +} + +// ogcMachines lists all the instances. +func (p *provisioner) ogcMachines(ctx context.Context) ([]Machine, error) { + var out bytes.Buffer + proc, err := p.ogcRun(ctx, []string{"ls", "--as-yaml"}, false, process.WithCmdOptions(runner.AttachOut(&out))) + if err != nil { + return nil, fmt.Errorf("failed to run ogc ls: %w", err) + } + ps := <-proc.Wait() + if ps.ExitCode() != 0 { + return nil, fmt.Errorf("failed to run ogc ls: docker run exited with code: %d", ps.ExitCode()) + } + var machines []Machine + err = yaml.Unmarshal(out.Bytes(), &machines) + if err != nil { + return nil, fmt.Errorf("failed to parse ogc ls output: %w", err) + } + return machines, nil +} + +func (p *provisioner) ogcRun(ctx context.Context, args []string, interactive bool, processOpts ...process.StartOption) (*process.Info, error) { + wd, err := runner.WorkDir() + if err != nil { + return nil, err + } + tokenName := filepath.Base(p.cfg.ServiceTokenPath) + clientEmail, err := p.cfg.ClientEmail() + if err != nil { + return nil, err + } + projectID, err := p.cfg.ProjectID() + if err != nil { + return nil, err + } + runArgs := []string{"run"} + if interactive { + runArgs = append(runArgs, "-i") + } + runArgs = append(runArgs, + "--rm", + "-e", + fmt.Sprintf("GOOGLE_APPLICATION_SERVICE_ACCOUNT=%s", clientEmail), + "-e", + fmt.Sprintf("GOOGLE_APPLICATION_CREDENTIALS=/root/%s", tokenName), + "-e", + fmt.Sprintf("GOOGLE_PROJECT=%s", projectID), + "-e", + fmt.Sprintf("GOOGLE_DATACENTER=%s", p.cfg.Datacenter), + "-v", + fmt.Sprintf("%s:/root/%s", p.cfg.ServiceTokenPath, tokenName), + "-v", + fmt.Sprintf("%s:%s", wd, wd), + "-w", + wd, + "docker.elastic.co/observability-ci/ogc:5.0.1", + "--", + "ogc", + "-v", + ) + runArgs = append(runArgs, args...) + opts := []process.StartOption{process.WithContext(ctx), process.WithArgs(runArgs)} + opts = append(opts, processOpts...) + return process.Start("docker", opts...) +} + +func osBatchToOGC(cacheDir string, batch common.OSBatch) Layout { + tags := []string{ + LayoutIntegrationTag, + batch.OS.Type, + batch.OS.Arch, + } + if batch.OS.Type == define.Linux { + tags = append(tags, strings.ToLower(fmt.Sprintf("%s-%s", batch.OS.Distro, strings.Replace(batch.OS.Version, ".", "-", -1)))) + } else { + tags = append(tags, strings.ToLower(fmt.Sprintf("%s-%s", batch.OS.Type, strings.Replace(batch.OS.Version, ".", "-", -1)))) + } + los, _ := findOSLayout(batch.OS.OS) + return Layout{ + Name: batch.ID, + Provider: los.Provider, + InstanceSize: los.InstanceSize, + RunsOn: los.RunsOn, + RemotePath: los.RemotePath, + Scale: 1, + Username: los.Username, + SSHPrivateKey: cacheDir + "/id_rsa", + SSHPublicKey: cacheDir + "/id_rsa.pub", + Ports: []string{"22:22"}, + Tags: tags, + Labels: map[string]string{ + "division": "engineering", + "org": "ingest", + "team": "elastic-agent-control-plane", + "project": "elastic-agent", + }, + Scripts: "path", // not used; but required by OGC + } +} + +func findOSLayout(os define.OS) (LayoutOS, bool) { + for _, s := range ogcSupported { + if s.OS == os { + return s, true + } + } + return LayoutOS{}, false +} + +func findMachine(machines []Machine, name string) (Machine, bool) { + for _, m := range machines { + if m.Layout.Name == name { + return m, true + } + } + return Machine{}, false +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ogc/supported.go b/dev-tools/mage/target/srvrlesstest/testing/ogc/supported.go new file mode 100644 index 000000000000..b3dcb1e77cf7 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ogc/supported.go @@ -0,0 +1,202 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ogc + +import ( + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/supported" +) + +const ( + // Google is for the Google Cloud Platform (GCP) + Google = "google" +) + +// ogcSupported defines the set of supported OS's the OGC provisioner currently supports. +// +// In the case that a batch is not specific on the version and/or distro the first +// one in this list will be picked. So it's best to place the one that we want the +// most testing at the top. +var ogcSupported = []LayoutOS{ + { + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: supported.Ubuntu, + Version: "24.04", + }, + Provider: Google, + InstanceSize: "e2-standard-2", // 2 amd64 cpus, 8 GB RAM + RunsOn: "ubuntu-2404-lts-amd64", + Username: "ubuntu", + RemotePath: "/home/ubuntu/agent", + }, + { + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: supported.Ubuntu, + Version: "22.04", + }, + Provider: Google, + InstanceSize: "e2-standard-2", // 2 amd64 cpus, 8 GB RAM + RunsOn: "ubuntu-2204-lts", + Username: "ubuntu", + RemotePath: "/home/ubuntu/agent", + }, + { + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: supported.Ubuntu, + Version: "20.04", + }, + Provider: Google, + InstanceSize: "e2-standard-2", // 2 amd64 cpus, 8 GB RAM + RunsOn: "ubuntu-2004-lts", + Username: "ubuntu", + RemotePath: "/home/ubuntu/agent", + }, + // These instance types are experimental on Google Cloud and very unstable + // We will wait until Google introduces new ARM instance types + // https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu + // { + // OS: define.OS{ + // Type: define.Linux, + // Arch: define.ARM64, + // Distro: runner.Ubuntu, + // Version: "24.04", + // }, + // Provider: Google, + // InstanceSize: "t2a-standard-4", // 4 arm64 cpus, 16 GB RAM + // RunsOn: "ubuntu-2404-lts-arm64", + // Username: "ubuntu", + // RemotePath: "/home/ubuntu/agent", + // }, + // { + // OS: define.OS{ + // Type: define.Linux, + // Arch: define.ARM64, + // Distro: runner.Ubuntu, + // Version: "22.04", + // }, + // Provider: Google, + // InstanceSize: "t2a-standard-4", // 4 arm64 cpus, 16 GB RAM + // RunsOn: "ubuntu-2204-lts-arm64", + // Username: "ubuntu", + // RemotePath: "/home/ubuntu/agent", + // }, + // { + // OS: define.OS{ + // Type: define.Linux, + // Arch: define.ARM64, + // Distro: runner.Ubuntu, + // Version: "20.04", + // }, + // Provider: Google, + // InstanceSize: "t2a-standard-4", // 4 arm64 cpus, 16 GB RAM + // RunsOn: "ubuntu-2004-lts-arm64", + // Username: "ubuntu", + // RemotePath: "/home/ubuntu/agent", + // }, + { + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: supported.Rhel, + Version: "8", + }, + Provider: Google, + InstanceSize: "e2-standard-2", // 2 amd64 cpus, 8 GB RAM + RunsOn: "rhel-8", + Username: "rhel", + RemotePath: "/home/rhel/agent", + }, + { + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2022", + }, + Provider: Google, + InstanceSize: "e2-standard-4", // 4 amd64 cpus, 16 GB RAM + RunsOn: "windows-2022", + Username: "windows", + RemotePath: "C:\\Users\\windows\\agent", + }, + { + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2022-core", + }, + Provider: Google, + InstanceSize: "e2-standard-4", // 4 amd64 cpus, 16 GB RAM + RunsOn: "windows-2022-core", + Username: "windows", + RemotePath: "C:\\Users\\windows\\agent", + }, + { + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2019", + }, + Provider: Google, + InstanceSize: "e2-standard-4", // 4 amd64 cpus, 16 GB RAM + RunsOn: "windows-2019", + Username: "windows", + RemotePath: "C:\\Users\\windows\\agent", + }, + { + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2019-core", + }, + Provider: Google, + InstanceSize: "e2-standard-4", // 4 amd64 cpus, 16 GB RAM + RunsOn: "windows-2019-core", + Username: "windows", + RemotePath: "C:\\Users\\windows\\agent", + }, + { + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2016", + }, + Provider: Google, + InstanceSize: "e2-standard-4", // 4 amd64 cpus, 16 GB RAM + RunsOn: "windows-2016", + Username: "windows", + RemotePath: "C:\\Users\\windows\\agent", + }, + { + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2016-core", + }, + Provider: Google, + InstanceSize: "e2-standard-4", // 4 amd64 cpus, 16 GB RAM + RunsOn: "windows-2016-core", + Username: "windows", + RemotePath: "C:\\Users\\windows\\agent", + }, +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/runner/archiver.go b/dev-tools/mage/target/srvrlesstest/testing/runner/archiver.go new file mode 100644 index 000000000000..c0b668bed4ff --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/runner/archiver.go @@ -0,0 +1,125 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package runner + +import ( + "archive/zip" + "bufio" + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" +) + +func createRepoZipArchive(ctx context.Context, dir string, dest string) error { + absDir, err := filepath.Abs(dir) + if err != nil { + return fmt.Errorf("failed to get absolute path to %s: %w", dir, err) + } + + projectFilesOutput, err := cmdBufferedOutput(exec.Command("git", "ls-files", "-z"), dir) + if err != nil { + return err + } + + // Add files that are not yet tracked in git. Prevents a footcannon where someone writes code to a new file, then tests it before they add to git + untrackedOutput, err := cmdBufferedOutput(exec.Command("git", "ls-files", "--exclude-standard", "-o", "-z"), dir) + if err != nil { + return err + } + + _, err = io.Copy(&projectFilesOutput, &untrackedOutput) + if err != nil { + return fmt.Errorf("failed to read stdout of git ls-files -o: %w", err) + } + + archive, err := os.Create(dest) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", dest, err) + } + defer archive.Close() + + zw := zip.NewWriter(archive) + defer zw.Close() + + s := bufio.NewScanner(&projectFilesOutput) + s.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if i := strings.IndexRune(string(data), '\x00'); i >= 0 { + return i + 1, data[0:i], nil + } + if !atEOF { + return 0, nil, nil + } + return len(data), data, bufio.ErrFinalToken + }) + for s.Scan() { + if ctx.Err() != nil { + // incomplete close and delete + _ = archive.Close() + _ = os.Remove(dest) + return ctx.Err() + } + err := func(line string) error { + if line == "" { + return nil + } + fullPath := filepath.Join(absDir, line) + s, err := os.Stat(fullPath) + if err != nil { + return fmt.Errorf("failed to stat file %s: %w", fullPath, err) + } + if s.IsDir() { + // skip directories + return nil + } + f, err := os.Open(fullPath) + if err != nil { + return fmt.Errorf("failed to open file %s: %w", fullPath, err) + } + defer f.Close() + w, err := zw.Create(line) + if err != nil { + return fmt.Errorf("failed to create zip entry %s: %w", line, err) + } + _, err = io.Copy(w, f) + if err != nil { + return fmt.Errorf("failed to copy zip entry %s: %w", line, err) + } + return nil + }(s.Text()) + if err != nil { + return fmt.Errorf("error adding files: %w", err) + } + } + return nil +} + +func cmdBufferedOutput(cmd *exec.Cmd, workDir string) (bytes.Buffer, error) { + var stdoutBuf bytes.Buffer + cmd.Dir = workDir + cmd.Stdout = &stdoutBuf + err := cmd.Run() + if err != nil { + return *bytes.NewBufferString(""), fmt.Errorf("failed to run cmd %s: %w", strings.Join(cmd.Args, " "), err) + } + return stdoutBuf, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/runner/json.go b/dev-tools/mage/target/srvrlesstest/testing/runner/json.go new file mode 100644 index 000000000000..8e044a0ac0c1 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/runner/json.go @@ -0,0 +1,60 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package runner + +import ( + "bufio" + "bytes" + "encoding/json" +) + +type JSONTestEntry struct { + Time string `json:"Time"` + Action string `json:"Action"` + Package string `json:"Package"` + Test string `json:"Test"` + Output string `json:"Output"` +} + +func suffixJSONResults(content []byte, suffix string) ([]byte, error) { + var result bytes.Buffer + sc := bufio.NewScanner(bytes.NewReader(content)) + for sc.Scan() { + var entry JSONTestEntry + err := json.Unmarshal([]byte(sc.Text()), &entry) + if err != nil { + return nil, err + } + if entry.Package != "" { + entry.Package += suffix + } + raw, err := json.Marshal(&entry) + if err != nil { + return nil, err + } + _, err = result.Write(raw) + if err != nil { + return nil, err + } + _, err = result.Write([]byte("\n")) + if err != nil { + return nil, err + } + } + return result.Bytes(), nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/runner/junit.go b/dev-tools/mage/target/srvrlesstest/testing/runner/junit.go new file mode 100644 index 000000000000..3ecb3721210f --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/runner/junit.go @@ -0,0 +1,99 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package runner + +import ( + "encoding/xml" + "io" +) + +// JUnitTestSuites is a collection of JUnit test suites. +type JUnitTestSuites struct { + XMLName xml.Name `xml:"testsuites"` + Name string `xml:"name,attr,omitempty"` + Tests int `xml:"tests,attr,omitempty"` + Failures int `xml:"failures,attr,omitempty"` + Errors int `xml:"errors,attr,omitempty"` + Time string `xml:"time,attr,omitempty"` + Suites []JUnitTestSuite `xml:"testsuite"` +} + +// JUnitTestSuite is a single JUnit test suite which may contain many +// testcases. +type JUnitTestSuite struct { + XMLName xml.Name `xml:"testsuite"` + Tests int `xml:"tests,attr"` + Failures int `xml:"failures,attr"` + Time string `xml:"time,attr"` + Name string `xml:"name,attr"` + Properties []JUnitProperty `xml:"properties>property,omitempty"` + TestCases []JUnitTestCase `xml:"testcase"` + Timestamp string `xml:"timestamp,attr"` +} + +// JUnitTestCase is a single test case with its result. +type JUnitTestCase struct { + XMLName xml.Name `xml:"testcase"` + Classname string `xml:"classname,attr"` + Name string `xml:"name,attr"` + Time string `xml:"time,attr"` + SkipMessage *JUnitSkipMessage `xml:"skipped,omitempty"` + Failure *JUnitFailure `xml:"failure,omitempty"` +} + +// JUnitSkipMessage contains the reason why a testcase was skipped. +type JUnitSkipMessage struct { + Message string `xml:"message,attr"` +} + +// JUnitProperty represents a key/value pair used to define properties. +type JUnitProperty struct { + Name string `xml:"name,attr"` + Value string `xml:"value,attr"` +} + +// JUnitFailure contains data related to a failed test. +type JUnitFailure struct { + Message string `xml:"message,attr"` + Type string `xml:"type,attr"` + Contents string `xml:",chardata"` +} + +// parseJUnit parses contents into a JUnit structure. +func parseJUnit(contents []byte) (JUnitTestSuites, error) { + var suites JUnitTestSuites + err := xml.Unmarshal(contents, &suites) + if err != nil { + return JUnitTestSuites{}, err + } + return suites, nil +} + +// writeJUnit writes the suites to the out writer. +func writeJUnit(out io.Writer, suites JUnitTestSuites) error { + doc, err := xml.MarshalIndent(suites, "", "\t") + if err != nil { + return err + } + _, err = out.Write([]byte(xml.Header)) + if err != nil { + return err + } + _, err = out.Write(doc) + return err +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/runner/runner.go b/dev-tools/mage/target/srvrlesstest/testing/runner/runner.go new file mode 100644 index 000000000000..832f1b6a2172 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/runner/runner.go @@ -0,0 +1,968 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package runner + +import ( + "bytes" + "context" + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing" + "io" + "os" + "path/filepath" + "slices" + "strings" + "sync" + "time" + + "golang.org/x/crypto/ssh" + "golang.org/x/sync/errgroup" + "gopkg.in/yaml.v2" + + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + tssh "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/supported" +) + +// Result is the complete result from the runner. +type Result struct { + // Tests is the number of tests ran. + Tests int + // Failures is the number of tests that failed. + Failures int + // Output is the raw test output. + Output []byte + // XMLOutput is the XML Junit output. + XMLOutput []byte + // JSONOutput is the JSON output. + JSONOutput []byte +} + +// State represents the state storage of what has been provisioned. +type State struct { + // Instances stores provisioned and prepared instances. + Instances []StateInstance `yaml:"instances"` + + // Stacks store provisioned stacks. + Stacks []common.Stack `yaml:"stacks"` +} + +// StateInstance is an instance stored in the state. +type StateInstance struct { + common.Instance + + // Prepared set to true when the instance is prepared. + Prepared bool `yaml:"prepared"` +} + +// Runner runs the tests on remote instances. +type Runner struct { + cfg common.Config + logger common.Logger + ip common.InstanceProvisioner + sp common.StackProvisioner + + batches []common.OSBatch + + batchToStack map[string]stackRes + batchToStackCh map[string]chan stackRes + batchToStackMx sync.Mutex + + stateMx sync.Mutex + state State +} + +// NewRunner creates a new runner based on the provided batches. +func NewRunner(cfg common.Config, ip common.InstanceProvisioner, sp common.StackProvisioner, batches ...define.Batch) (*Runner, error) { + err := cfg.Validate() + if err != nil { + return nil, err + } + platforms, err := cfg.GetPlatforms() + if err != nil { + return nil, err + } + + osBatches, err := supported.CreateBatches(batches, platforms, cfg.Groups, cfg.Matrix, cfg.SingleTest) + if err != nil { + return nil, err + } + osBatches = filterSupportedOS(osBatches, ip) + + logger := &runnerLogger{ + writer: os.Stdout, + timestamp: cfg.Timestamp, + } + ip.SetLogger(logger) + sp.SetLogger(logger) + + r := &Runner{ + cfg: cfg, + logger: logger, + ip: ip, + sp: sp, + batches: osBatches, + batchToStack: make(map[string]stackRes), + batchToStackCh: make(map[string]chan stackRes), + } + + err = r.loadState() + if err != nil { + return nil, err + } + return r, nil +} + +// Logger returns the logger used by the runner. +func (r *Runner) Logger() common.Logger { + return r.logger +} + +// Run runs all the tests. +func (r *Runner) Run(ctx context.Context) (Result, error) { + // validate tests can even be performed + err := r.validate() + if err != nil { + return Result{}, err + } + + // prepare + prepareCtx, prepareCancel := context.WithTimeout(ctx, 10*time.Minute) + defer prepareCancel() + sshAuth, repoArchive, err := r.prepare(prepareCtx) + if err != nil { + return Result{}, err + } + + // start the needed stacks + err = r.startStacks(ctx) + if err != nil { + return Result{}, err + } + + // only send to the provisioner the batches that need to be created + var instances []StateInstance + var batches []common.OSBatch + for _, b := range r.batches { + if !b.Skip { + i, ok := r.findInstance(b.ID) + if ok { + instances = append(instances, i) + } else { + batches = append(batches, b) + } + } + } + if len(batches) > 0 { + provisionedInstances, err := r.ip.Provision(ctx, r.cfg, batches) + if err != nil { + return Result{}, err + } + for _, i := range provisionedInstances { + instances = append(instances, StateInstance{ + Instance: i, + Prepared: false, + }) + } + } + + var results map[string]common.OSRunnerResult + switch r.ip.Type() { + case common.ProvisionerTypeVM: + // use SSH to perform all the required work on the instances + results, err = r.runInstances(ctx, sshAuth, repoArchive, instances) + if err != nil { + return Result{}, err + } + case common.ProvisionerTypeK8SCluster: + results, err = r.runK8sInstances(ctx, instances) + if err != nil { + return Result{}, err + } + + default: + return Result{}, fmt.Errorf("invalid provisioner type %d", r.ip.Type()) + } + + // merge the results + return r.mergeResults(results) +} + +// Clean performs a cleanup to ensure anything that could have been left running is removed. +func (r *Runner) Clean() error { + r.stateMx.Lock() + defer r.stateMx.Unlock() + + var instances []common.Instance + for _, i := range r.state.Instances { + instances = append(instances, i.Instance) + } + r.state.Instances = nil + stacks := make([]common.Stack, len(r.state.Stacks)) + copy(stacks, r.state.Stacks) + r.state.Stacks = nil + err := r.writeState() + if err != nil { + return err + } + + var g errgroup.Group + g.Go(func() error { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + return r.ip.Clean(ctx, r.cfg, instances) + }) + for _, stack := range stacks { + g.Go(func(stack common.Stack) func() error { + return func() error { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + return r.sp.Delete(ctx, stack) + } + }(stack)) + } + return g.Wait() +} + +func (r *Runner) runK8sInstances(ctx context.Context, instances []StateInstance) (map[string]common.OSRunnerResult, error) { + results := make(map[string]common.OSRunnerResult) + var resultsMx sync.Mutex + var err error + for _, instance := range instances { + batch, ok := findBatchByID(instance.ID, r.batches) + if !ok { + err = fmt.Errorf("unable to find batch with ID: %s", instance.ID) + continue + } + + logger := &batchLogger{wrapped: r.logger, prefix: instance.ID} + // start with the ExtraEnv first preventing the other environment flags below + // from being overwritten + env := map[string]string{} + for k, v := range r.cfg.ExtraEnv { + env[k] = v + } + + // ensure that we have all the requirements for the stack if required + if batch.Batch.Stack != nil { + // wait for the stack to be ready before continuing + logger.Logf("Waiting for stack to be ready...") + stack, stackErr := r.getStackForBatchID(batch.ID) + if stackErr != nil { + err = stackErr + continue + } + env["ELASTICSEARCH_HOST"] = stack.Elasticsearch + env["ELASTICSEARCH_USERNAME"] = stack.Username + env["ELASTICSEARCH_PASSWORD"] = stack.Password + env["KIBANA_HOST"] = stack.Kibana + env["KIBANA_USERNAME"] = stack.Username + env["KIBANA_PASSWORD"] = stack.Password + logger.Logf("Using Stack with Kibana host %s, credentials available under .integration-cache", stack.Kibana) + } + + // set the go test flags + env["GOTEST_FLAGS"] = r.cfg.TestFlags + env["KUBECONFIG"] = instance.Instance.Internal["config"].(string) + env["TEST_BINARY_NAME"] = r.cfg.BinaryName + env["K8S_VERSION"] = instance.Instance.Internal["version"].(string) + env["AGENT_IMAGE"] = instance.Instance.Internal["agent_image"].(string) + + prefix := fmt.Sprintf("%s-%s", instance.Instance.Internal["version"].(string), batch.ID) + + // run the actual tests on the host + result, runErr := batch.OS.Runner.Run(ctx, r.cfg.VerboseMode, nil, logger, r.cfg.AgentVersion, prefix, batch.Batch, env) + if runErr != nil { + logger.Logf("Failed to execute tests on instance: %s", err) + err = fmt.Errorf("failed to execute tests on instance %s: %w", instance.Name, err) + } + resultsMx.Lock() + results[batch.ID] = result + resultsMx.Unlock() + } + if err != nil { + return nil, err + } + return results, nil +} + +// runInstances runs the batch on each instance in parallel. +func (r *Runner) runInstances(ctx context.Context, sshAuth ssh.AuthMethod, repoArchive string, instances []StateInstance) (map[string]common.OSRunnerResult, error) { + g, ctx := errgroup.WithContext(ctx) + results := make(map[string]common.OSRunnerResult) + var resultsMx sync.Mutex + for _, i := range instances { + func(i StateInstance) { + g.Go(func() error { + batch, ok := findBatchByID(i.ID, r.batches) + if !ok { + return fmt.Errorf("unable to find batch with ID: %s", i.ID) + } + logger := &batchLogger{wrapped: r.logger, prefix: i.ID} + result, err := r.runInstance(ctx, sshAuth, logger, repoArchive, batch, i) + if err != nil { + logger.Logf("Failed for instance %s (@ %s): %s\n", i.ID, i.IP, err) + return err + } + resultsMx.Lock() + results[batch.ID] = result + resultsMx.Unlock() + return nil + }) + }(i) + } + err := g.Wait() + if err != nil { + return nil, err + } + return results, nil +} + +// runInstance runs the batch on the machine. +func (r *Runner) runInstance(ctx context.Context, sshAuth ssh.AuthMethod, logger common.Logger, repoArchive string, batch common.OSBatch, instance StateInstance) (common.OSRunnerResult, error) { + sshPrivateKeyPath, err := filepath.Abs(filepath.Join(r.cfg.StateDir, "id_rsa")) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("failed to determine OGC SSH private key path: %w", err) + } + + logger.Logf("Starting SSH; connect with `ssh -i %s %s@%s`", sshPrivateKeyPath, instance.Username, instance.IP) + client := tssh.NewClient(instance.IP, instance.Username, sshAuth, logger) + connectCtx, connectCancel := context.WithTimeout(ctx, 10*time.Minute) + defer connectCancel() + err = client.Connect(connectCtx) + if err != nil { + logger.Logf("Failed to connect to instance %s: %s", instance.IP, err) + return common.OSRunnerResult{}, fmt.Errorf("failed to connect to instance %s: %w", instance.Name, err) + } + defer client.Close() + logger.Logf("Connected over SSH") + + if !instance.Prepared { + // prepare the host to run the tests + logger.Logf("Preparing instance") + err = batch.OS.Runner.Prepare(ctx, client, logger, batch.OS.Arch, r.cfg.GOVersion) + if err != nil { + logger.Logf("Failed to prepare instance: %s", err) + return common.OSRunnerResult{}, fmt.Errorf("failed to prepare instance %s: %w", instance.Name, err) + } + + // now its prepared, add to state + instance.Prepared = true + err = r.addOrUpdateInstance(instance) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("failed to save instance state %s: %w", instance.Name, err) + } + } + + // copy the required files (done every run) + err = batch.OS.Runner.Copy(ctx, client, logger, repoArchive, r.getBuilds(batch)) + if err != nil { + logger.Logf("Failed to copy files instance: %s", err) + return common.OSRunnerResult{}, fmt.Errorf("failed to copy files to instance %s: %w", instance.Name, err) + } + // start with the ExtraEnv first preventing the other environment flags below + // from being overwritten + env := map[string]string{} + for k, v := range r.cfg.ExtraEnv { + env[k] = v + } + + // ensure that we have all the requirements for the stack if required + if batch.Batch.Stack != nil { + // wait for the stack to be ready before continuing + logger.Logf("Waiting for stack to be ready...") + stack, err := r.getStackForBatchID(batch.ID) + if err != nil { + return common.OSRunnerResult{}, err + } + env["ELASTICSEARCH_HOST"] = stack.Elasticsearch + env["ELASTICSEARCH_USERNAME"] = stack.Username + env["ELASTICSEARCH_PASSWORD"] = stack.Password + env["KIBANA_HOST"] = stack.Kibana + env["KIBANA_USERNAME"] = stack.Username + env["KIBANA_PASSWORD"] = stack.Password + logger.Logf("Using Stack with Kibana host %s, credentials available under .integration-cache", stack.Kibana) + } + + // set the go test flags + env["GOTEST_FLAGS"] = r.cfg.TestFlags + env["TEST_BINARY_NAME"] = r.cfg.BinaryName + + // run the actual tests on the host + result, err := batch.OS.Runner.Run(ctx, r.cfg.VerboseMode, client, logger, r.cfg.AgentVersion, batch.ID, batch.Batch, env) + if err != nil { + logger.Logf("Failed to execute tests on instance: %s", err) + return common.OSRunnerResult{}, fmt.Errorf("failed to execute tests on instance %s: %w", instance.Name, err) + } + + // fetch any diagnostics + if r.cfg.DiagnosticsDir != "" { + err = batch.OS.Runner.Diagnostics(ctx, client, logger, r.cfg.DiagnosticsDir) + if err != nil { + logger.Logf("Failed to fetch diagnostics: %s", err) + } + } else { + logger.Logf("Skipping diagnostics fetch as DiagnosticsDir was not set") + } + + return result, nil +} + +// validate ensures that required builds of Elastic Agent exist +func (r *Runner) validate() error { + var requiredFiles []string + for _, b := range r.batches { + if !b.Skip { + for _, build := range r.getBuilds(b) { + if !slices.Contains(requiredFiles, build.Path) { + requiredFiles = append(requiredFiles, build.Path) + } + if !slices.Contains(requiredFiles, build.SHA512Path) { + requiredFiles = append(requiredFiles, build.SHA512Path) + } + } + } + } + var missingFiles []string + for _, file := range requiredFiles { + _, err := os.Stat(file) + if os.IsNotExist(err) { + missingFiles = append(missingFiles, file) + } else if err != nil { + return err + } + } + if len(missingFiles) > 0 { + return fmt.Errorf("missing required Elastic Agent package builds for integration runner to execute: %s", strings.Join(missingFiles, ", ")) + } + return nil +} + +// getBuilds returns the build for the batch. +func (r *Runner) getBuilds(b common.OSBatch) []common.Build { + var builds []common.Build + formats := []string{"targz", "zip", "rpm", "deb"} + binaryName := "elastic-agent" + + var packages []string + for _, p := range r.cfg.Packages { + if slices.Contains(formats, p) { + packages = append(packages, p) + } + } + if len(packages) == 0 { + packages = formats + } + + // This is for testing beats in serverless environment + if strings.HasSuffix(r.cfg.BinaryName, "beat") { + var serverlessPackages []string + for _, p := range packages { + if slices.Contains([]string{"targz", "zip"}, p) { + serverlessPackages = append(serverlessPackages, p) + } + } + packages = serverlessPackages + } + + if r.cfg.BinaryName != "" { + binaryName = r.cfg.BinaryName + } + + for _, f := range packages { + arch := b.OS.Arch + if arch == define.AMD64 { + arch = "x86_64" + } + suffix, err := testing.GetPackageSuffix(b.OS.Type, b.OS.Arch, f) + if err != nil { + // Means that OS type & Arch doesn't support that package format + continue + } + packageName := filepath.Join(r.cfg.BuildDir, fmt.Sprintf("%s-%s-%s", binaryName, r.cfg.AgentVersion, suffix)) + build := common.Build{ + Version: r.cfg.ReleaseVersion, + Type: b.OS.Type, + Arch: arch, + Path: packageName, + SHA512Path: packageName + ".sha512", + } + + builds = append(builds, build) + } + return builds +} + +// prepare prepares for the runner to run. +// +// Creates the SSH keys to use, creates the archive of the repo and pulls the latest container for OGC. +func (r *Runner) prepare(ctx context.Context) (ssh.AuthMethod, string, error) { + wd, err := WorkDir() + if err != nil { + return nil, "", err + } + cacheDir := filepath.Join(wd, r.cfg.StateDir) + _, err = os.Stat(cacheDir) + if errors.Is(err, os.ErrNotExist) { + err = os.Mkdir(cacheDir, 0755) + if err != nil { + return nil, "", fmt.Errorf("failed to create %q: %w", cacheDir, err) + } + } else if err != nil { + // unknown error + return nil, "", err + } + + var auth ssh.AuthMethod + var repoArchive string + g, gCtx := errgroup.WithContext(ctx) + g.Go(func() error { + a, err := r.createSSHKey(cacheDir) + if err != nil { + return err + } + auth = a + return nil + }) + g.Go(func() error { + repo, err := r.createRepoArchive(gCtx, r.cfg.RepoDir, cacheDir) + if err != nil { + return err + } + repoArchive = repo + return nil + }) + err = g.Wait() + if err != nil { + return nil, "", err + } + return auth, repoArchive, err +} + +// createSSHKey creates the required SSH keys +func (r *Runner) createSSHKey(dir string) (ssh.AuthMethod, error) { + privateKey := filepath.Join(dir, "id_rsa") + _, priErr := os.Stat(privateKey) + publicKey := filepath.Join(dir, "id_rsa.pub") + _, pubErr := os.Stat(publicKey) + var signer ssh.Signer + if errors.Is(priErr, os.ErrNotExist) || errors.Is(pubErr, os.ErrNotExist) { + // either is missing (re-create) + r.logger.Logf("Create SSH keys to use for SSH") + _ = os.Remove(privateKey) + _ = os.Remove(publicKey) + pri, err := tssh.NewPrivateKey() + if err != nil { + return nil, fmt.Errorf("failed to create ssh private key: %w", err) + } + pubBytes, err := tssh.NewPublicKey(&pri.PublicKey) + if err != nil { + return nil, fmt.Errorf("failed to create ssh public key: %w", err) + } + priBytes := tssh.EncodeToPEM(pri) + err = os.WriteFile(privateKey, priBytes, 0600) + if err != nil { + return nil, fmt.Errorf("failed to write ssh private key: %w", err) + } + err = os.WriteFile(publicKey, pubBytes, 0644) + if err != nil { + return nil, fmt.Errorf("failed to write ssh public key: %w", err) + } + signer, err = ssh.ParsePrivateKey(priBytes) + if err != nil { + return nil, fmt.Errorf("failed to parse ssh private key: %w", err) + } + } else if priErr != nil { + // unknown error + return nil, priErr + } else if pubErr != nil { + // unknown error + return nil, pubErr + } else { + // read from existing private key + priBytes, err := os.ReadFile(privateKey) + if err != nil { + return nil, fmt.Errorf("failed to read ssh private key %s: %w", privateKey, err) + } + signer, err = ssh.ParsePrivateKey(priBytes) + if err != nil { + return nil, fmt.Errorf("failed to parse ssh private key: %w", err) + } + } + return ssh.PublicKeys(signer), nil +} + +func (r *Runner) createRepoArchive(ctx context.Context, repoDir string, dir string) (string, error) { + zipPath := filepath.Join(dir, "agent-repo.zip") + _ = os.Remove(zipPath) // start fresh + r.logger.Logf("Creating zip archive of repo to send to remote hosts") + err := createRepoZipArchive(ctx, repoDir, zipPath) + if err != nil { + return "", fmt.Errorf("failed to create zip archive of repo: %w", err) + } + return zipPath, nil +} + +// startStacks starts the stacks required for the tests to run +func (r *Runner) startStacks(ctx context.Context) error { + var versions []string + batchToVersion := make(map[string]string) + for _, lb := range r.batches { + if !lb.Skip && lb.Batch.Stack != nil { + if lb.Batch.Stack.Version == "" { + // no version defined on the stack; set it to the defined stack version + lb.Batch.Stack.Version = r.cfg.StackVersion + } + if !slices.Contains(versions, lb.Batch.Stack.Version) { + versions = append(versions, lb.Batch.Stack.Version) + } + batchToVersion[lb.ID] = lb.Batch.Stack.Version + } + } + + var requests []stackReq + for _, version := range versions { + id := strings.Replace(version, ".", "", -1) + requests = append(requests, stackReq{ + request: common.StackRequest{ID: id, Version: version}, + stack: r.findStack(id), + }) + } + + reportResult := func(version string, stack common.Stack, err error) { + r.batchToStackMx.Lock() + defer r.batchToStackMx.Unlock() + res := stackRes{ + stack: stack, + err: err, + } + for batchID, batchVersion := range batchToVersion { + if batchVersion == version { + r.batchToStack[batchID] = res + ch, ok := r.batchToStackCh[batchID] + if ok { + ch <- res + } + } + } + } + + // start goroutines to provision the needed stacks + for _, request := range requests { + go func(ctx context.Context, req stackReq) { + var err error + var stack common.Stack + if req.stack != nil { + stack = *req.stack + } else { + stack, err = r.sp.Create(ctx, req.request) + if err != nil { + reportResult(req.request.Version, stack, err) + return + } + err = r.addOrUpdateStack(stack) + if err != nil { + reportResult(stack.Version, stack, err) + return + } + } + + if stack.Ready { + reportResult(stack.Version, stack, nil) + return + } + + stack, err = r.sp.WaitForReady(ctx, stack) + if err != nil { + reportResult(stack.Version, stack, err) + return + } + + err = r.addOrUpdateStack(stack) + if err != nil { + reportResult(stack.Version, stack, err) + return + } + + reportResult(stack.Version, stack, nil) + }(ctx, request) + } + + return nil +} + +func (r *Runner) getStackForBatchID(id string) (common.Stack, error) { + r.batchToStackMx.Lock() + res, ok := r.batchToStack[id] + if ok { + r.batchToStackMx.Unlock() + return res.stack, res.err + } + _, ok = r.batchToStackCh[id] + if ok { + return common.Stack{}, fmt.Errorf("getStackForBatchID called twice; this is not allowed") + } + ch := make(chan stackRes, 1) + r.batchToStackCh[id] = ch + r.batchToStackMx.Unlock() + + // 12 minutes is because the stack should have been ready after 10 minutes or returned an error + // this only exists to ensure that if that code is not blocking that this doesn't block forever + t := time.NewTimer(12 * time.Minute) + defer t.Stop() + select { + case <-t.C: + return common.Stack{}, fmt.Errorf("failed waiting for a response after 12 minutes") + case res = <-ch: + return res.stack, res.err + } +} + +func (r *Runner) findInstance(id string) (StateInstance, bool) { + r.stateMx.Lock() + defer r.stateMx.Unlock() + for _, existing := range r.state.Instances { + if existing.Same(StateInstance{ + Instance: common.Instance{ID: id, Provisioner: r.ip.Name()}}) { + return existing, true + } + } + return StateInstance{}, false +} + +func (r *Runner) addOrUpdateInstance(instance StateInstance) error { + r.stateMx.Lock() + defer r.stateMx.Unlock() + + state := r.state + found := false + for idx, existing := range state.Instances { + if existing.Same(instance) { + state.Instances[idx] = instance + found = true + break + } + } + if !found { + state.Instances = append(state.Instances, instance) + } + r.state = state + return r.writeState() +} + +func (r *Runner) findStack(id string) *common.Stack { + r.stateMx.Lock() + defer r.stateMx.Unlock() + for _, existing := range r.state.Stacks { + if existing.Same(common.Stack{ID: id, Provisioner: r.sp.Name()}) { + return &existing + } + } + return nil +} + +func (r *Runner) addOrUpdateStack(stack common.Stack) error { + r.stateMx.Lock() + defer r.stateMx.Unlock() + + state := r.state + found := false + for idx, existing := range state.Stacks { + if existing.Same(stack) { + state.Stacks[idx] = stack + found = true + break + } + } + if !found { + state.Stacks = append(state.Stacks, stack) + } + r.state = state + return r.writeState() +} + +func (r *Runner) loadState() error { + data, err := os.ReadFile(r.getStatePath()) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("failed to read state file %s: %w", r.getStatePath(), err) + } + var state State + err = yaml.Unmarshal(data, &state) + if err != nil { + return fmt.Errorf("failed unmarshal state file %s: %w", r.getStatePath(), err) + } + r.state = state + return nil +} + +func (r *Runner) writeState() error { + data, err := yaml.Marshal(&r.state) + if err != nil { + return fmt.Errorf("failed to marshal state: %w", err) + } + err = os.WriteFile(r.getStatePath(), data, 0644) + if err != nil { + return fmt.Errorf("failed to write state file %s: %w", r.getStatePath(), err) + } + return nil +} + +func (r *Runner) getStatePath() string { + return filepath.Join(r.cfg.StateDir, "state.yml") +} + +func (r *Runner) mergeResults(results map[string]common.OSRunnerResult) (Result, error) { + var rawOutput bytes.Buffer + var jsonOutput bytes.Buffer + var suites JUnitTestSuites + for id, res := range results { + for _, pkg := range res.Packages { + err := mergePackageResult(pkg, id, false, &rawOutput, &jsonOutput, &suites) + if err != nil { + return Result{}, err + } + } + for _, pkg := range res.SudoPackages { + err := mergePackageResult(pkg, id, true, &rawOutput, &jsonOutput, &suites) + if err != nil { + return Result{}, err + } + } + } + var junitBytes bytes.Buffer + err := writeJUnit(&junitBytes, suites) + if err != nil { + return Result{}, fmt.Errorf("failed to marshal junit: %w", err) + } + + var complete Result + for _, suite := range suites.Suites { + complete.Tests += suite.Tests + complete.Failures += suite.Failures + } + complete.Output = rawOutput.Bytes() + complete.JSONOutput = jsonOutput.Bytes() + complete.XMLOutput = junitBytes.Bytes() + return complete, nil +} + +// Same returns true if other is the same instance as this one. +// Two instances are considered the same if their provider and ID are the same. +func (s StateInstance) Same(other StateInstance) bool { + return s.Provisioner == other.Provisioner && + s.ID == other.ID +} + +func mergePackageResult(pkg common.OSRunnerPackageResult, batchName string, sudo bool, rawOutput io.Writer, jsonOutput io.Writer, suites *JUnitTestSuites) error { + suffix := "" + sudoStr := "false" + if sudo { + suffix = "(sudo)" + sudoStr = "true" + } + if pkg.Output != nil { + rawLogger := &runnerLogger{writer: rawOutput, timestamp: false} + pkgWriter := common.NewPrefixOutput(rawLogger, fmt.Sprintf("%s(%s)%s: ", pkg.Name, batchName, suffix)) + _, err := pkgWriter.Write(pkg.Output) + if err != nil { + return fmt.Errorf("failed to write raw output from %s %s: %w", batchName, pkg.Name, err) + } + } + if pkg.JSONOutput != nil { + jsonSuffix, err := suffixJSONResults(pkg.JSONOutput, fmt.Sprintf("(%s)%s", batchName, suffix)) + if err != nil { + return fmt.Errorf("failed to suffix json output from %s %s: %w", batchName, pkg.Name, err) + } + _, err = jsonOutput.Write(jsonSuffix) + if err != nil { + return fmt.Errorf("failed to write json output from %s %s: %w", batchName, pkg.Name, err) + } + } + if pkg.XMLOutput != nil { + pkgSuites, err := parseJUnit(pkg.XMLOutput) + if err != nil { + return fmt.Errorf("failed to parse junit from %s %s: %w", batchName, pkg.Name, err) + } + for _, pkgSuite := range pkgSuites.Suites { + // append the batch information to the suite name + pkgSuite.Name = fmt.Sprintf("%s(%s)%s", pkgSuite.Name, batchName, suffix) + pkgSuite.Properties = append(pkgSuite.Properties, JUnitProperty{ + Name: "batch", + Value: batchName, + }, JUnitProperty{ + Name: "sudo", + Value: sudoStr, + }) + suites.Suites = append(suites.Suites, pkgSuite) + } + } + return nil +} + +func findBatchByID(id string, batches []common.OSBatch) (common.OSBatch, bool) { + for _, batch := range batches { + if batch.ID == id { + return batch, true + } + } + return common.OSBatch{}, false +} + +type runnerLogger struct { + writer io.Writer + timestamp bool +} + +func (l *runnerLogger) Logf(format string, args ...any) { + if l.timestamp { + _, _ = fmt.Fprintf(l.writer, "[%s] >>> %s\n", time.Now().Format(time.StampMilli), fmt.Sprintf(format, args...)) + } else { + _, _ = fmt.Fprintf(l.writer, ">>> %s\n", fmt.Sprintf(format, args...)) + } +} + +type batchLogger struct { + wrapped common.Logger + prefix string +} + +func filterSupportedOS(batches []common.OSBatch, provisioner common.InstanceProvisioner) []common.OSBatch { + var filtered []common.OSBatch + for _, batch := range batches { + if ok := provisioner.Supported(batch.OS.OS); ok { + filtered = append(filtered, batch) + } + } + return filtered +} + +func (b *batchLogger) Logf(format string, args ...any) { + b.wrapped.Logf("(%s) %s", b.prefix, fmt.Sprintf(format, args...)) +} + +type stackRes struct { + stack common.Stack + err error +} + +type stackReq struct { + request common.StackRequest + stack *common.Stack +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/runner/utils.go b/dev-tools/mage/target/srvrlesstest/testing/runner/utils.go new file mode 100644 index 000000000000..26a1c0bd7bf7 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/runner/utils.go @@ -0,0 +1,54 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package runner + +import ( + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/core/process" + "io" + "os" + "os/exec" + "path/filepath" +) + +// WorkDir returns the current absolute working directory. +func WorkDir() (string, error) { + wd, err := os.Getwd() + if err != nil { + return "", fmt.Errorf("failed to get work directory: %w", err) + } + wd, err = filepath.Abs(wd) + if err != nil { + return "", fmt.Errorf("failed to get absolute path to work directory: %w", err) + } + return wd, nil +} + +func AttachOut(w io.Writer) process.CmdOption { + return func(c *exec.Cmd) error { + c.Stdout = w + return nil + } +} + +func AttachErr(w io.Writer) process.CmdOption { + return func(c *exec.Cmd) error { + c.Stderr = w + return nil + } +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ssh/client.go b/dev-tools/mage/target/srvrlesstest/testing/ssh/client.go new file mode 100644 index 000000000000..fac1ad051953 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ssh/client.go @@ -0,0 +1,301 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ssh + +import ( + "bytes" + "context" + "fmt" + "io" + "net" + "os" + "strings" + "time" + + "golang.org/x/crypto/ssh" +) + +type logger interface { + // Logf logs the message for this runner. + Logf(format string, args ...any) +} + +type sshClient struct { + ip string + username string + auth ssh.AuthMethod + logger logger + c *ssh.Client +} + +// NewClient creates a new SSH client connection to the host. +func NewClient(ip string, username string, sshAuth ssh.AuthMethod, logger logger) SSHClient { + return &sshClient{ + ip: ip, + username: username, + auth: sshAuth, + logger: logger, + } +} + +// Connect connects to the host. +func (s *sshClient) Connect(ctx context.Context) error { + var lastErr error + config := &ssh.ClientConfig{ + User: s.username, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), //nolint:gosec // it's the tests framework test + Auth: []ssh.AuthMethod{s.auth}, + Timeout: 30 * time.Second, + } + addr := net.JoinHostPort(s.ip, "22") + + tcpAddr, err := net.ResolveTCPAddr("tcp", addr) + if err != nil { + return fmt.Errorf("unable to resolve ssh address %q :%w", addr, err) + } + delay := 1 * time.Second + for { + if ctx.Err() != nil { + if lastErr == nil { + return ctx.Err() + } + return lastErr + } + if lastErr != nil { + s.logger.Logf("ssh connect error: %q, will try again in %s", lastErr, delay) + time.Sleep(delay) + delay = 2 * delay + + } + conn, err := net.DialTCP("tcp", nil, tcpAddr) + if err != nil { + lastErr = fmt.Errorf("error dialing tcp address %q :%w", addr, err) + continue + } + err = conn.SetKeepAlive(true) + if err != nil { + _ = conn.Close() + lastErr = fmt.Errorf("error setting TCP keepalive for ssh to %q :%w", addr, err) + continue + } + err = conn.SetKeepAlivePeriod(config.Timeout) + if err != nil { + _ = conn.Close() + lastErr = fmt.Errorf("error setting TCP keepalive period for ssh to %q :%w", addr, err) + continue + } + sshConn, chans, reqs, err := ssh.NewClientConn(conn, addr, config) + if err != nil { + _ = conn.Close() + lastErr = fmt.Errorf("error NewClientConn for ssh to %q :%w", addr, err) + continue + } + s.c = ssh.NewClient(sshConn, chans, reqs) + return nil + } +} + +// ConnectWithTimeout connects to the host with a timeout. +func (s *sshClient) ConnectWithTimeout(ctx context.Context, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + return s.Connect(ctx) +} + +// Close closes the client. +func (s *sshClient) Close() error { + if s.c != nil { + err := s.c.Close() + s.c = nil + return err + } + return nil +} + +// Reconnect disconnects and reconnected to the host. +func (s *sshClient) Reconnect(ctx context.Context) error { + _ = s.Close() + return s.Connect(ctx) +} + +// ReconnectWithTimeout disconnects and reconnected to the host with a timeout. +func (s *sshClient) ReconnectWithTimeout(ctx context.Context, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + return s.Reconnect(ctx) +} + +// NewSession opens a new Session for this host. +func (s *sshClient) NewSession() (*ssh.Session, error) { + return s.c.NewSession() +} + +// Exec runs a command on the host. +func (s *sshClient) Exec(ctx context.Context, cmd string, args []string, stdin io.Reader) ([]byte, []byte, error) { + if ctx.Err() != nil { + return nil, nil, ctx.Err() + } + + var session *ssh.Session + cmdArgs := []string{cmd} + cmdArgs = append(cmdArgs, args...) + cmdStr := strings.Join(cmdArgs, " ") + session, err := s.NewSession() + if err != nil { + s.logger.Logf("new session failed: %q, trying reconnect", err) + lErr := s.Reconnect(ctx) + if lErr != nil { + return nil, nil, fmt.Errorf("ssh reconnect failed: %w, after new session failed: %w", lErr, err) + } + session, lErr = s.NewSession() + if lErr != nil { + return nil, nil, fmt.Errorf("new session failed after reconnect: %w, original new session failure was: %w", lErr, err) + } + } + defer session.Close() + + var stdout bytes.Buffer + var stderr bytes.Buffer + session.Stdout = &stdout + session.Stderr = &stderr + if stdin != nil { + session.Stdin = stdin + } + err = session.Run(cmdStr) + if err != nil { + return stdout.Bytes(), stderr.Bytes(), fmt.Errorf("could not run %q though SSH: %w", + cmdStr, err) + } + return stdout.Bytes(), stderr.Bytes(), err +} + +// ExecWithRetry runs the command on loop waiting the interval between calls +func (s *sshClient) ExecWithRetry(ctx context.Context, cmd string, args []string, interval time.Duration) ([]byte, []byte, error) { + var lastErr error + var lastStdout []byte + var lastStderr []byte + for { + // the length of time for running the command is not blocked on the interval + // don't create a new context with the interval as its timeout + stdout, stderr, err := s.Exec(ctx, cmd, args, nil) + if err == nil { + return stdout, stderr, nil + } + s.logger.Logf("ssh exec error: %q, will try again in %s", err, interval) + lastErr = err + lastStdout = stdout + lastStderr = stderr + + // wait for the interval or ctx to be cancelled + select { + case <-ctx.Done(): + if lastErr != nil { + return lastStdout, lastStderr, lastErr + } + return nil, nil, ctx.Err() + case <-time.After(interval): + } + } +} + +// Copy copies the filePath to the host at dest. +func (s *sshClient) Copy(filePath string, dest string) error { + f, err := os.Open(filePath) + if err != nil { + return err + } + defer f.Close() + fs, err := f.Stat() + if err != nil { + return err + } + + session, err := s.NewSession() + if err != nil { + return err + } + defer session.Close() + + w, err := session.StdinPipe() + if err != nil { + return err + } + + cmd := fmt.Sprintf("scp -t %s", dest) + if err := session.Start(cmd); err != nil { + _ = w.Close() + return err + } + + errCh := make(chan error) + go func() { + errCh <- session.Wait() + }() + + _, err = fmt.Fprintf(w, "C%#o %d %s\n", fs.Mode().Perm(), fs.Size(), dest) + if err != nil { + _ = w.Close() + <-errCh + return err + } + _, err = io.Copy(w, f) + if err != nil { + _ = w.Close() + <-errCh + return err + } + _, _ = fmt.Fprint(w, "\x00") + _ = w.Close() + return <-errCh +} + +// GetFileContents returns the file content. +func (s *sshClient) GetFileContents(ctx context.Context, filename string, opts ...FileContentsOpt) ([]byte, error) { + var stdout bytes.Buffer + err := s.GetFileContentsOutput(ctx, filename, &stdout, opts...) + if err != nil { + return nil, err + } + return stdout.Bytes(), nil +} + +// GetFileContentsOutput returns the file content writing into output. +func (s *sshClient) GetFileContentsOutput(ctx context.Context, filename string, output io.Writer, opts ...FileContentsOpt) error { + if ctx.Err() != nil { + return ctx.Err() + } + + var fco fileContentsOpts + fco.command = "cat" + for _, opt := range opts { + opt(&fco) + } + + session, err := s.NewSession() + if err != nil { + return err + } + defer session.Close() + + session.Stdout = output + err = session.Run(fmt.Sprintf("%s %s", fco.command, filename)) + if err != nil { + return err + } + return nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ssh/file.go b/dev-tools/mage/target/srvrlesstest/testing/ssh/file.go new file mode 100644 index 000000000000..1fd6ffd9e333 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ssh/file.go @@ -0,0 +1,32 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ssh + +type fileContentsOpts struct { + command string +} + +// FileContentsOpt provides an option to modify how fetching files from the remote host work. +type FileContentsOpt func(opts *fileContentsOpts) + +// WithContentFetchCommand changes the command to use for fetching the file contents. +func WithContentFetchCommand(command string) FileContentsOpt { + return func(opts *fileContentsOpts) { + opts.command = command + } +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ssh/interface.go b/dev-tools/mage/target/srvrlesstest/testing/ssh/interface.go new file mode 100644 index 000000000000..487f19a5c06c --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ssh/interface.go @@ -0,0 +1,62 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ssh + +import ( + "context" + "io" + "time" + + "golang.org/x/crypto/ssh" +) + +// SSHClient is a *ssh.Client that provides a nice interface to work with. +type SSHClient interface { + // Connect connects to the host. + Connect(ctx context.Context) error + + // ConnectWithTimeout connects to the host with a timeout. + ConnectWithTimeout(ctx context.Context, timeout time.Duration) error + + // Close closes the client. + Close() error + + // Reconnect disconnects and reconnected to the host. + Reconnect(ctx context.Context) error + + // ReconnectWithTimeout disconnects and reconnected to the host with a timeout. + ReconnectWithTimeout(ctx context.Context, timeout time.Duration) error + + // NewSession opens a new Session for this host. + NewSession() (*ssh.Session, error) + + // Exec runs a command on the host. + Exec(ctx context.Context, cmd string, args []string, stdin io.Reader) ([]byte, []byte, error) + + // ExecWithRetry runs the command on loop waiting the interval between calls + ExecWithRetry(ctx context.Context, cmd string, args []string, interval time.Duration) ([]byte, []byte, error) + + // Copy copies the filePath to the host at dest. + Copy(filePath string, dest string) error + + // GetFileContents returns the file content. + GetFileContents(ctx context.Context, filename string, opts ...FileContentsOpt) ([]byte, error) + + // GetFileContentsOutput returns the file content writing to output. + GetFileContentsOutput(ctx context.Context, filename string, output io.Writer, opts ...FileContentsOpt) error +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/ssh/keys.go b/dev-tools/mage/target/srvrlesstest/testing/ssh/keys.go new file mode 100644 index 000000000000..6d40a31fea18 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/ssh/keys.go @@ -0,0 +1,60 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package ssh + +import ( + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "encoding/pem" + + "golang.org/x/crypto/ssh" +) + +// NewPrivateKey creates RSA private key +func NewPrivateKey() (*rsa.PrivateKey, error) { + pk, err := rsa.GenerateKey(rand.Reader, 2056) + if err != nil { + return nil, err + } + err = pk.Validate() + if err != nil { + return nil, err + } + return pk, nil +} + +// EncodeToPEM encodes private key to PEM format +func EncodeToPEM(privateKey *rsa.PrivateKey) []byte { + der := x509.MarshalPKCS1PrivateKey(privateKey) + privBlock := pem.Block{ + Type: "RSA PRIVATE KEY", + Headers: nil, + Bytes: der, + } + return pem.EncodeToMemory(&privBlock) +} + +// NewPublicKey returns bytes for writing to .pub file +func NewPublicKey(pk *rsa.PublicKey) ([]byte, error) { + pub, err := ssh.NewPublicKey(pk) + if err != nil { + return nil, err + } + return ssh.MarshalAuthorizedKey(pub), nil +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/supported/batch.go b/dev-tools/mage/target/srvrlesstest/testing/supported/batch.go new file mode 100644 index 000000000000..5138bbaf225d --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/supported/batch.go @@ -0,0 +1,195 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package supported + +import ( + "crypto/sha512" + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "strings" + "unicode/utf8" +) + +// CreateBatches creates the OSBatch set based on the defined supported OS's. +func CreateBatches(batches []define.Batch, platforms []define.OS, groups []string, matrix bool, singleTest string) ([]common.OSBatch, error) { + var err error + var osBatches []common.OSBatch + for _, b := range batches { + lbs, err := createBatchesFromBatch(b, platforms, groups, matrix) + if err != nil { + return nil, err + } + if lbs != nil { + osBatches = append(osBatches, lbs...) + } + } + if singleTest != "" { + osBatches, err = filterSingleTest(osBatches, singleTest) + if err != nil { + return nil, err + } + } + + return osBatches, nil +} + +func createBatchesFromBatch(batch define.Batch, platforms []define.OS, groups []string, matrix bool) ([]common.OSBatch, error) { + var batches []common.OSBatch + if len(groups) > 0 && !batchInGroups(batch, groups) { + return nil, nil + } + specifics, err := getSupported(batch.OS, platforms) + if errors.Is(err, ErrOSNotSupported) { + var s common.SupportedOS + s.OS.Type = batch.OS.Type + s.OS.Arch = batch.OS.Arch + s.OS.Distro = batch.OS.Distro + if s.OS.Distro == "" { + s.OS.Distro = "unknown" + } + if s.OS.Version == "" { + s.OS.Version = "unknown" + } + b := common.OSBatch{ + OS: s, + Batch: batch, + Skip: true, + } + b.ID = createBatchID(b) + batches = append(batches, b) + return batches, nil + } else if err != nil { + return nil, err + } + if matrix { + for _, s := range specifics { + b := common.OSBatch{ + OS: s, + Batch: batch, + Skip: false, + } + b.ID = createBatchID(b) + batches = append(batches, b) + } + } else { + b := common.OSBatch{ + OS: specifics[0], + Batch: batch, + Skip: false, + } + b.ID = createBatchID(b) + batches = append(batches, b) + } + return batches, nil +} + +func batchInGroups(batch define.Batch, groups []string) bool { + for _, g := range groups { + if batch.Group == g { + return true + } + } + return false +} + +func filterSingleTest(batches []common.OSBatch, singleTest string) ([]common.OSBatch, error) { + var filtered []common.OSBatch + for _, batch := range batches { + batch, ok := filterSingleTestBatch(batch, singleTest) + if ok { + filtered = append(filtered, batch) + } + } + if len(filtered) == 0 { + return nil, fmt.Errorf("test not found: %s", singleTest) + } + return filtered, nil +} + +func filterSingleTestBatch(batch common.OSBatch, testName string) (common.OSBatch, bool) { + for _, pt := range batch.Batch.Tests { + for _, t := range pt.Tests { + if t.Name == testName { + // filter batch to only run one test + batch.Batch.Tests = []define.BatchPackageTests{ + { + Name: pt.Name, + Tests: []define.BatchPackageTest{t}, + }, + } + batch.Batch.SudoTests = nil + // remove stack requirement when the test doesn't need a stack + if !t.Stack { + batch.Batch.Stack = nil + } + return batch, true + } + } + } + for _, pt := range batch.Batch.SudoTests { + for _, t := range pt.Tests { + if t.Name == testName { + // filter batch to only run one test + batch.Batch.SudoTests = []define.BatchPackageTests{ + { + Name: pt.Name, + Tests: []define.BatchPackageTest{t}, + }, + } + batch.Batch.Tests = nil + // remove stack requirement when the test doesn't need a stack + if !t.Stack { + batch.Batch.Stack = nil + } + return batch, true + } + } + } + return batch, false +} + +// createBatchID creates a consistent/unique ID for the batch +// +// ID needs to be consistent so each execution of the runner always +// selects the same ID for each batch. +func createBatchID(batch common.OSBatch) string { + id := batch.OS.Type + "-" + batch.OS.Arch + if batch.OS.Type == define.Linux { + id += "-" + batch.OS.Distro + } + if batch.OS.Version != "" { + id += "-" + strings.Replace(batch.OS.Version, ".", "", -1) + } + if batch.OS.Type == define.Kubernetes && batch.OS.DockerVariant != "" { + id += "-" + batch.OS.DockerVariant + } + id += "-" + strings.Replace(batch.Batch.Group, ".", "", -1) + + // The batchID needs to be at most 63 characters long otherwise + // OGC will fail to instantiate the VM. + maxIDLen := 63 + if len(id) > maxIDLen { + hash := fmt.Sprintf("%x", sha512.Sum384([]byte(id))) + hashLen := utf8.RuneCountInString(hash) + id = id[:maxIDLen-hashLen-1] + "-" + hash + } + + return strings.ToLower(id) +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/supported/supported.go b/dev-tools/mage/target/srvrlesstest/testing/supported/supported.go new file mode 100644 index 000000000000..c9a0a15eab82 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/supported/supported.go @@ -0,0 +1,287 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package supported + +import ( + "errors" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/kubernetes" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/linux" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/windows" +) + +const ( + Rhel = "rhel" + // Ubuntu is a Linux distro. + Ubuntu = "ubuntu" +) + +var ( + // ErrOSNotSupported returned when it's an unsupported OS. + ErrOSNotSupported = errors.New("os/arch not currently supported") +) + +var ( + // UbuntuAMD64_2404 - Ubuntu (amd64) 24.04 + UbuntuAMD64_2404 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: Ubuntu, + Version: "24.04", + }, + Runner: linux.DebianRunner{}, + } + // UbuntuAMD64_2204 - Ubuntu (amd64) 22.04 + UbuntuAMD64_2204 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: Ubuntu, + Version: "22.04", + }, + Runner: linux.DebianRunner{}, + } + // UbuntuAMD64_2004 - Ubuntu (amd64) 20.04 + UbuntuAMD64_2004 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: Ubuntu, + Version: "20.04", + }, + Runner: linux.DebianRunner{}, + } + // UbuntuARM64_2404 - Ubuntu (arm64) 24.04 + UbuntuARM64_2404 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.ARM64, + Distro: Ubuntu, + Version: "24.04", + }, + Runner: linux.DebianRunner{}, + } + // UbuntuARM64_2204 - Ubuntu (arm64) 22.04 + UbuntuARM64_2204 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.ARM64, + Distro: Ubuntu, + Version: "22.04", + }, + Runner: linux.DebianRunner{}, + } + // UbuntuARM64_2004 - Ubuntu (arm64) 20.04 + UbuntuARM64_2004 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.ARM64, + Distro: Ubuntu, + Version: "20.04", + }, + Runner: linux.DebianRunner{}, + } + // RhelAMD64_8 - RedHat Enterprise Linux (amd64) 8 + RhelAMD64_8 = common.SupportedOS{ + OS: define.OS{ + Type: define.Linux, + Arch: define.AMD64, + Distro: Rhel, + Version: "8", + }, + Runner: linux.RhelRunner{}, + } + // WindowsAMD64_2022 - Windows (amd64) Server 2022 + WindowsAMD64_2022 = common.SupportedOS{ + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2022", + }, + Runner: windows.WindowsRunner{}, + } + // WindowsAMD64_2022_Core - Windows (amd64) Server 2022 Core + WindowsAMD64_2022_Core = common.SupportedOS{ + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2022-core", + }, + Runner: windows.WindowsRunner{}, + } + // WindowsAMD64_2019 - Windows (amd64) Server 2019 + WindowsAMD64_2019 = common.SupportedOS{ + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2019", + }, + Runner: windows.WindowsRunner{}, + } + // WindowsAMD64_2019_Core - Windows (amd64) Server 2019 Core + WindowsAMD64_2019_Core = common.SupportedOS{ + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2019-core", + }, + Runner: windows.WindowsRunner{}, + } + // WindowsAMD64_2016 - Windows (amd64) Server 2016 + WindowsAMD64_2016 = common.SupportedOS{ + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2016", + }, + Runner: windows.WindowsRunner{}, + } + // WindowsAMD64_2016_Core - Windows (amd64) Server 2016 Core + WindowsAMD64_2016_Core = common.SupportedOS{ + OS: define.OS{ + Type: define.Windows, + Arch: define.AMD64, + Version: "2016-core", + }, + Runner: windows.WindowsRunner{}, + } +) + +// supported defines the set of supported OS's. +// +// A provisioner might support a lesser number of this OS's, but the following +// are known to be supported by out OS runner logic. +// +// In the case that a batch is not specific on the version and/or distro the first +// one in this list will be picked. So it's best to place the one that we want the +// most testing at the top. +var supported = []common.SupportedOS{ + UbuntuAMD64_2404, + UbuntuAMD64_2204, + UbuntuAMD64_2004, + UbuntuARM64_2404, + UbuntuARM64_2204, + UbuntuARM64_2004, + RhelAMD64_8, + WindowsAMD64_2022, + WindowsAMD64_2022_Core, + WindowsAMD64_2019, + WindowsAMD64_2019_Core, + // https://github.com/elastic/ingest-dev/issues/3484 + // WindowsAMD64_2016, + // WindowsAMD64_2016_Core, +} + +// init injects the kubernetes support list into the support list above +func init() { + for _, k8sSupport := range kubernetes.GetSupported() { + supported = append(supported, common.SupportedOS{ + OS: k8sSupport, + Runner: kubernetes.Runner{}, + }) + } +} + +// osMatch returns true when the specific OS is a match for a non-specific OS. +func osMatch(specific define.OS, notSpecific define.OS) bool { + if specific.Type != notSpecific.Type || specific.Arch != notSpecific.Arch { + return false + } + if notSpecific.Distro != "" && specific.Distro != notSpecific.Distro { + return false + } + if notSpecific.Version != "" && specific.Version != notSpecific.Version { + return false + } + if notSpecific.DockerVariant != "" && specific.DockerVariant != notSpecific.DockerVariant { + return false + } + return true +} + +// getSupported returns all the supported based on the provided OS profile while using +// the provided platforms as a filter. +func getSupported(os define.OS, platforms []define.OS) ([]common.SupportedOS, error) { + var match []common.SupportedOS + for _, s := range supported { + if osMatch(s.OS, os) && allowedByPlatforms(s.OS, platforms) { + match = append(match, s) + } + } + if len(match) > 0 { + return match, nil + } + return nil, fmt.Errorf("%w: %s/%s", ErrOSNotSupported, os.Type, os.Arch) +} + +// allowedByPlatforms determines if the os is in the allowed list of platforms. +func allowedByPlatforms(os define.OS, platforms []define.OS) bool { + if len(platforms) == 0 { + return true + } + for _, platform := range platforms { + if ok := allowedByPlatform(os, platform); ok { + return true + } + } + return false +} + +// allowedByPlatform determines if the platform allows this os. +func allowedByPlatform(os define.OS, platform define.OS) bool { + if os.Type != platform.Type { + return false + } + if platform.Arch == "" { + // not specific on arch + return true + } + if os.Arch != platform.Arch { + return false + } + if platform.Type == define.Linux { + // on linux distro is supported + if platform.Distro == "" { + // not specific on distro + return true + } + if os.Distro != platform.Distro { + return false + } + } + if platform.Version == "" { + // not specific on version + return true + } + if os.Version != platform.Version { + return false + } + if platform.Type == define.Kubernetes { + // on kubernetes docker variant is supported + if platform.DockerVariant == "" { + return true + } + if os.DockerVariant != platform.DockerVariant { + return false + } + } + return true +} diff --git a/dev-tools/mage/target/srvrlesstest/testing/windows/windows.go b/dev-tools/mage/target/srvrlesstest/testing/windows/windows.go new file mode 100644 index 000000000000..b554507f36fd --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/testing/windows/windows.go @@ -0,0 +1,342 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package windows + +import ( + "context" + "fmt" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/common" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/define" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest/testing/ssh" + "os" + "path" + "path/filepath" + "strings" + "time" +) + +// WindowsRunner is a handler for running tests on Windows +type WindowsRunner struct{} + +// Prepare the test +func (WindowsRunner) Prepare(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, arch string, goVersion string) error { + // install chocolatey + logger.Logf("Installing chocolatey") + chocoInstall := `"[System.Net.ServicePointManager]::SecurityProtocol = 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))"` + updateCtx, updateCancel := context.WithTimeout(ctx, 3*time.Minute) + defer updateCancel() + stdOut, errOut, err := sshRunPowershell(updateCtx, sshClient, chocoInstall) + if err != nil { + return fmt.Errorf("failed to install chocolatey: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + // reconnect to get updated environment variables (1 minute as it should be quick to reconnect) + err = sshClient.ReconnectWithTimeout(ctx, 1*time.Minute) + if err != nil { + return fmt.Errorf("failed to reconnect: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + // install curl + logger.Logf("Installing curl") + stdOut, errOut, err = sshClient.Exec(ctx, "choco", []string{"install", "-y", "curl"}, nil) + if err != nil { + return fmt.Errorf("failed to install curl: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + // install make + logger.Logf("Installing make") + stdOut, errOut, err = sshClient.Exec(ctx, "choco", []string{"install", "-y", "make"}, nil) + if err != nil { + return fmt.Errorf("failed to install make: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + // install golang (doesn't use choco, because sometimes it doesn't have the required version) + logger.Logf("Installing golang %s (%s)", goVersion, arch) + downloadURL := fmt.Sprintf("https://go.dev/dl/go%s.windows-%s.msi", goVersion, arch) + filename := path.Base(downloadURL) + stdOut, errOut, err = sshClient.Exec(ctx, "curl", []string{"-Ls", downloadURL, "--output", filename}, nil) + if err != nil { + return fmt.Errorf("failed to download go from %s with curl: %w (stdout: %s, stderr: %s)", downloadURL, err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(ctx, "msiexec", []string{"/i", filename, "/qn"}, nil) + if err != nil { + return fmt.Errorf("failed to install go: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + // reconnect to get updated environment variables (1 minute as it should be quick to reconnect) + err = sshClient.ReconnectWithTimeout(ctx, 1*time.Minute) + if err != nil { + return fmt.Errorf("failed to reconnect: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + return nil +} + +// Copy places the required files on the host. +func (WindowsRunner) Copy(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, repoArchive string, builds []common.Build) error { + // copy the archive and extract it on the host (tar exists and can extract zip on windows) + logger.Logf("Copying repo") + destRepoName := filepath.Base(repoArchive) + err := sshClient.Copy(repoArchive, destRepoName) + if err != nil { + return fmt.Errorf("failed to SCP repo archive %s: %w", repoArchive, err) + } + + // ensure that agent directory is removed (possible it already exists if instance already used) + // Windows errors if the directory doesn't exist, it's okay if it doesn't ignore any error here + _, _, _ = sshClient.Exec(ctx, "rmdir", []string{"agent", "/s", "/q"}, nil) + + stdOut, errOut, err := sshClient.Exec(ctx, "mkdir", []string{"agent"}, nil) + if err != nil { + return fmt.Errorf("failed to mkdir agent: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + stdOut, errOut, err = sshClient.Exec(ctx, "tar", []string{"-xf", destRepoName, "-C", "agent"}, nil) + if err != nil { + return fmt.Errorf("failed to unzip %s to agent directory: %w (stdout: %s, stderr: %s)", destRepoName, err, stdOut, errOut) + } + + // install mage and prepare for testing + logger.Logf("Running make mage and prepareOnRemote") + stdOut, errOut, err = sshClient.Exec(ctx, "cd", []string{"agent", "&&", "make", "mage", "&&", "mage", "integration:prepareOnRemote"}, nil) + if err != nil { + return fmt.Errorf("failed to to perform make mage and prepareOnRemote: %w (stdout: %s, stderr: %s)", err, stdOut, errOut) + } + + // determine if the build needs to be replaced on the host + // if it already exists and the SHA512 are the same contents, then + // there is no reason to waste time uploading the build + for _, build := range builds { + copyBuild := true + localSHA512, err := os.ReadFile(build.SHA512Path) + if err != nil { + return fmt.Errorf("failed to read local SHA52 contents %s: %w", build.SHA512Path, err) + } + hostSHA512Path := filepath.Base(build.SHA512Path) + hostSHA512, err := sshClient.GetFileContents(ctx, hostSHA512Path, ssh.WithContentFetchCommand("type")) + if err == nil { + if string(localSHA512) == string(hostSHA512) { + logger.Logf("Skipping copy agent build %s; already the same", filepath.Base(build.Path)) + copyBuild = false + } + } + + if copyBuild { + // ensure the existing copies are removed first + toRemove := filepath.Base(build.Path) + stdOut, errOut, err = sshClient.Exec(ctx, + "del", []string{toRemove, "/f", "/q"}, nil) + if err != nil { + return fmt.Errorf("failed to remove %q: %w (stdout: %q, stderr: %q)", + toRemove, err, stdOut, errOut) + } + + toRemove = filepath.Base(build.SHA512Path) + stdOut, errOut, err = sshClient.Exec(ctx, + "del", []string{toRemove, "/f", "/q"}, nil) + if err != nil { + return fmt.Errorf("failed to remove %q: %w (stdout: %q, stderr: %q)", + toRemove, err, stdOut, errOut) + } + + logger.Logf("Copying agent build %s", filepath.Base(build.Path)) + } + + for _, buildPath := range []string{build.Path, build.SHA512Path} { + if copyBuild { + err = sshClient.Copy(buildPath, filepath.Base(buildPath)) + if err != nil { + return fmt.Errorf("failed to SCP build %s: %w", filepath.Base(buildPath), err) + } + } + insideAgentDir := filepath.Join("agent", buildPath) + // possible the build path already exists, 'mkdir' on windows will fail if it already exists + // error from this call is ignored because of it + _, _, _ = sshClient.Exec(ctx, "mkdir", []string{toWindowsPath(filepath.Dir(insideAgentDir))}, nil) + stdOut, errOut, err = sshClient.Exec(ctx, "mklink", []string{"/h", toWindowsPath(insideAgentDir), filepath.Base(buildPath)}, nil) + if err != nil { + return fmt.Errorf("failed to hard link %s to %s: %w (stdout: %s, stderr: %s)", filepath.Base(buildPath), toWindowsPath(insideAgentDir), err, stdOut, errOut) + } + } + } + + return nil +} + +// Run the test +func (WindowsRunner) Run(ctx context.Context, verbose bool, c ssh.SSHClient, logger common.Logger, agentVersion string, prefix string, batch define.Batch, env map[string]string) (common.OSRunnerResult, error) { + var tests []string + for _, pkg := range batch.Tests { + for _, test := range pkg.Tests { + tests = append(tests, fmt.Sprintf("%s:%s", pkg.Name, test.Name)) + } + } + var sudoTests []string + for _, pkg := range batch.SudoTests { + for _, test := range pkg.Tests { + sudoTests = append(sudoTests, fmt.Sprintf("%s:%s", pkg.Name, test.Name)) + } + } + + var result common.OSRunnerResult + if len(tests) > 0 { + script := toPowershellScript(agentVersion, prefix, verbose, tests, env) + + results, err := runTestsOnWindows(ctx, logger, "non-sudo", prefix, script, c, batch.SudoTests) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("error running non-sudo tests: %w", err) + } + result.Packages = results + } + + if len(sudoTests) > 0 { + prefix := fmt.Sprintf("%s-sudo", prefix) + script := toPowershellScript(agentVersion, prefix, verbose, sudoTests, env) + + results, err := runTestsOnWindows(ctx, logger, "sudo", prefix, script, c, batch.SudoTests) + if err != nil { + return common.OSRunnerResult{}, fmt.Errorf("error running sudo tests: %w", err) + } + result.SudoPackages = results + + } + return result, nil +} + +// Diagnostics gathers any diagnostics from the host. +func (WindowsRunner) Diagnostics(ctx context.Context, sshClient ssh.SSHClient, logger common.Logger, destination string) error { + diagnosticDir := "agent\\build\\diagnostics" + stdOut, _, err := sshClient.Exec(ctx, "dir", []string{diagnosticDir, "/b"}, nil) + if err != nil { + //nolint:nilerr // failed to list the directory, probably don't have any diagnostics (do nothing) + return nil + } + eachDiagnostic := strings.Split(string(stdOut), "\n") + for _, filename := range eachDiagnostic { + filename = strings.TrimSpace(filename) + if filename == "" { + continue + } + + // don't use filepath.Join as we need this to work in Linux/Darwin as well + // this is because if we use `filepath.Join` on a Linux/Darwin host connected to a Windows host + // it will use a `/` and that will be incorrect for Windows + fp := fmt.Sprintf("%s\\%s", diagnosticDir, filename) + // use filepath.Join on this path because it's a path on this specific host platform + dp := filepath.Join(destination, filename) + logger.Logf("Copying diagnostic %s", filename) + out, err := os.Create(dp) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", dp, err) + } + err = sshClient.GetFileContentsOutput(ctx, fp, out, ssh.WithContentFetchCommand("type")) + _ = out.Close() + if err != nil { + return fmt.Errorf("failed to copy file from remote host to %s: %w", dp, err) + } + } + return nil +} + +func sshRunPowershell(ctx context.Context, sshClient ssh.SSHClient, cmd string) ([]byte, []byte, error) { + return sshClient.ExecWithRetry(ctx, "powershell", []string{ + "-NoProfile", + "-InputFormat", "None", + "-ExecutionPolicy", "Bypass", + "-Command", cmd, + }, 15*time.Second) +} + +func toPowershellScript(agentVersion string, prefix string, verbose bool, tests []string, env map[string]string) string { + var sb strings.Builder + for k, v := range env { + sb.WriteString("$env:") + sb.WriteString(k) + sb.WriteString("=\"") + sb.WriteString(v) + sb.WriteString("\"\n") + } + sb.WriteString("$env:AGENT_VERSION=\"") + sb.WriteString(agentVersion) + sb.WriteString("\"\n") + sb.WriteString("$env:TEST_DEFINE_PREFIX=\"") + sb.WriteString(prefix) + sb.WriteString("\"\n") + sb.WriteString("$env:TEST_DEFINE_TESTS=\"") + sb.WriteString(strings.Join(tests, ",")) + sb.WriteString("\"\n") + sb.WriteString("cd agent\n") + sb.WriteString("mage ") + if verbose { + sb.WriteString("-v ") + } + sb.WriteString("integration:testOnRemote\n") + return sb.String() +} + +func runTestsOnWindows(ctx context.Context, logger common.Logger, name string, prefix string, script string, sshClient ssh.SSHClient, tests []define.BatchPackageTests) ([]common.OSRunnerPackageResult, error) { + execTest := strings.NewReader(script) + + session, err := sshClient.NewSession() + if err != nil { + return nil, fmt.Errorf("failed to start session: %w", err) + } + + session.Stdout = common.NewPrefixOutput(logger, fmt.Sprintf("Test output (%s) (stdout): ", name)) + session.Stderr = common.NewPrefixOutput(logger, fmt.Sprintf("Test output (%s) (stderr): ", name)) + session.Stdin = execTest + // allowed to fail because tests might fail + logger.Logf("Running %s tests...", name) + err = session.Run("powershell -noprofile -noninteractive -") + if err != nil { + logger.Logf("%s tests failed: %s", name, err) + } + // this seems to always return an error + _ = session.Close() + + var result []common.OSRunnerPackageResult + // fetch the contents for each package + for _, pkg := range tests { + resultPkg, err := getWindowsRunnerPackageResult(ctx, sshClient, pkg, prefix) + if err != nil { + return nil, err + } + result = append(result, resultPkg) + } + return result, nil +} + +func toWindowsPath(path string) string { + return strings.ReplaceAll(path, "/", "\\") +} + +func getWindowsRunnerPackageResult(ctx context.Context, sshClient ssh.SSHClient, pkg define.BatchPackageTests, prefix string) (common.OSRunnerPackageResult, error) { + var err error + var resultPkg common.OSRunnerPackageResult + resultPkg.Name = pkg.Name + outputPath := fmt.Sprintf("%%home%%\\agent\\build\\TEST-go-remote-%s.%s", prefix, filepath.Base(pkg.Name)) + resultPkg.Output, err = sshClient.GetFileContents(ctx, outputPath+".out", ssh.WithContentFetchCommand("type")) + if err != nil { + return common.OSRunnerPackageResult{}, fmt.Errorf("failed to fetched test output at %s.out", outputPath) + } + resultPkg.JSONOutput, err = sshClient.GetFileContents(ctx, outputPath+".out.json", ssh.WithContentFetchCommand("type")) + if err != nil { + return common.OSRunnerPackageResult{}, fmt.Errorf("failed to fetched test output at %s.out.json", outputPath) + } + resultPkg.XMLOutput, err = sshClient.GetFileContents(ctx, outputPath+".xml", ssh.WithContentFetchCommand("type")) + if err != nil { + return common.OSRunnerPackageResult{}, fmt.Errorf("failed to fetched test output at %s.xml", outputPath) + } + return resultPkg, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/utils/root_unix.go b/dev-tools/mage/target/srvrlesstest/utils/root_unix.go new file mode 100644 index 000000000000..d410c5de16f1 --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/utils/root_unix.go @@ -0,0 +1,33 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build !windows + +package utils + +import "os" + +const ( + // PermissionUser is the permission level the user needs to be. + PermissionUser = "root" +) + +// HasRoot returns true if the user has root permissions. +// Added extra `nil` value to return since the HasRoot for windows will return an error as well +func HasRoot() (bool, error) { + return os.Geteuid() == 0, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/utils/root_windows.go b/dev-tools/mage/target/srvrlesstest/utils/root_windows.go new file mode 100644 index 000000000000..d3a83e32005e --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/utils/root_windows.go @@ -0,0 +1,59 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build windows + +package utils + +import ( + "fmt" + + "golang.org/x/sys/windows" +) + +const ( + // PermissionUser is the permission level the user needs to be. + PermissionUser = "Administrator" +) + +// HasRoot returns true if the user has Administrator/SYSTEM permissions. +func HasRoot() (bool, error) { + var sid *windows.SID + // See https://docs.microsoft.com/en-us/windows/desktop/api/securitybaseapi/nf-securitybaseapi-checktokenmembership for more on the api + err := windows.AllocateAndInitializeSid( + &windows.SECURITY_NT_AUTHORITY, + 2, + windows.SECURITY_BUILTIN_DOMAIN_RID, + windows.DOMAIN_ALIAS_RID_ADMINS, + 0, 0, 0, 0, 0, 0, + &sid) + if err != nil { + return false, fmt.Errorf("allocate sid error: %w", err) + } + defer func() { + _ = windows.FreeSid(sid) + }() + + token := windows.Token(0) + + member, err := token.IsMember(sid) + if err != nil { + return false, fmt.Errorf("token membership error: %w", err) + } + + return member, nil +} diff --git a/dev-tools/mage/target/srvrlesstest/utils/root_windows_test.go b/dev-tools/mage/target/srvrlesstest/utils/root_windows_test.go new file mode 100644 index 000000000000..b8cd3080ea8b --- /dev/null +++ b/dev-tools/mage/target/srvrlesstest/utils/root_windows_test.go @@ -0,0 +1,33 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build windows + +package utils + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHasRoot(t *testing.T) { + t.Run("check if user is admin", func(t *testing.T) { + _, err := HasRoot() + assert.NoError(t, err) + }) +} diff --git a/go.mod b/go.mod index 3e2fe304b676..a496d32f4172 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/elastic/beats/v7 -go 1.22.0 +go 1.22.3 + +toolchain go1.22.7 require ( cloud.google.com/go/bigquery v1.62.0 @@ -155,9 +157,9 @@ require ( gopkg.in/yaml.v2 v2.4.0 gotest.tools/gotestsum v1.7.0 howett.net/plist v1.0.1 - k8s.io/api v0.29.5 - k8s.io/apimachinery v0.29.5 - k8s.io/client-go v0.29.5 + k8s.io/api v0.31.1 + k8s.io/apimachinery v0.31.1 + k8s.io/client-go v0.31.1 kernel.org/pub/linux/libs/security/libcap/cap v1.2.57 ) @@ -204,7 +206,7 @@ require ( github.com/go-ldap/ldap/v3 v3.4.6 github.com/gofrs/uuid/v5 v5.2.0 github.com/golang-jwt/jwt/v5 v5.2.1 - github.com/google/cel-go v0.19.0 + github.com/google/cel-go v0.20.1 github.com/googleapis/gax-go/v2 v2.13.0 github.com/gorilla/handlers v1.5.1 github.com/gorilla/mux v1.8.0 @@ -230,6 +232,7 @@ require ( golang.org/x/term v0.24.0 google.golang.org/genproto/googleapis/api v0.0.0-20240725223205-93522f1f2a9f gopkg.in/natefinch/lumberjack.v2 v2.2.1 + sigs.k8s.io/e2e-framework v0.5.0 ) require ( @@ -289,9 +292,10 @@ require ( github.com/elazarl/goproxy v0.0.0-20240909085733-6741dbfc16a1 // indirect github.com/elazarl/goproxy/ext v0.0.0-20240909085733-6741dbfc16a1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch v5.6.0+incompatible // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/fearful-symmetry/gomsr v0.0.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.5 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.2 // indirect @@ -349,7 +353,7 @@ require ( github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/iochan v1.0.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect - github.com/moby/spdystream v0.2.0 // indirect + github.com/moby/spdystream v0.4.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -370,6 +374,7 @@ require ( github.com/stretchr/objx v0.5.2 // indirect github.com/tklauser/numcpus v0.4.0 // indirect github.com/vishvananda/netlink v1.2.1-beta.2 // indirect + github.com/x448/float16 v0.8.4 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect @@ -389,15 +394,17 @@ require ( golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect - k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect kernel.org/pub/linux/libs/security/libcap/psx v1.2.57 // indirect mvdan.cc/garble v0.12.1 // indirect + sigs.k8s.io/controller-runtime v0.19.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.3.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) require ( diff --git a/go.sum b/go.sum index ba2722f5baab..81243a2d0d8e 100644 --- a/go.sum +++ b/go.sum @@ -242,6 +242,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2 h1:oMCHnXa6CCCafdPDbMh/lWRhRByN0VFLvv+g+ayx1SI= github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2/go.mod h1:PkYb9DJNAwrSvRx5DYA+gUcOIgTGVMNkfSCbZM8cWpI= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bluekeyes/go-gitdiff v0.7.1 h1:graP4ElLRshr8ecu0UtqfNTCHrtSyZd3DABQm/DWesQ= github.com/bluekeyes/go-gitdiff v0.7.1/go.mod h1:QpfYYO1E0fTVHVZAZKiRjtSGY9823iCdvGXBcEzHGbM= github.com/bsm/sarama-cluster v2.1.14-0.20180625083203-7e67d87a6b3f+incompatible h1:4g18+HnTDwEtO0n7K8B1Kjq+04MEKJRkhJNQ/hb9d5A= @@ -407,8 +409,8 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= @@ -427,6 +429,8 @@ github.com/foxcpp/go-mockdns v0.0.0-20201212160233-ede2f9158d15/go.mod h1:tPg4cp github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= github.com/go-asn1-ber/asn1-ber v1.5.5 h1:MNHlNMBDgEKD4TcKr36vQN68BA00aDfjIt3/bD50WnA= github.com/go-asn1-ber/asn1-ber v1.5.5/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-faker/faker/v4 v4.2.0 h1:dGebOupKwssrODV51E0zbMrv5e2gO9VWSLNC1WDCpWg= @@ -447,6 +451,8 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab h1:xveKWz2iaueeTaUgdetzel+U7exyigDYBryyVfV/rZk= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= @@ -468,7 +474,8 @@ github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfC github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gocarina/gocsv v0.0.0-20170324095351-ffef3ffc77be h1:zXHeEEJ231bTf/IXqvCfeaqjLpXsq42ybLoT4ROSR6Y= github.com/gocarina/gocsv v0.0.0-20170324095351-ffef3ffc77be/go.mod h1:/oj50ZdPq/cUjA02lMZhijk5kR31SEydKyqah1OgBuo= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= @@ -522,8 +529,8 @@ github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gomodule/redigo v1.8.3 h1:HR0kYDX2RJZvAup8CsiJwxB4dTCSC0AaUq6S4SiLwUc= github.com/gomodule/redigo v1.8.3/go.mod h1:P9dn9mFrCBvWhGE1wpxx6fgq7BAeLBk+UUUzlpkBYO0= -github.com/google/cel-go v0.19.0 h1:vVgaZoHPBDd1lXCYGQOh5A06L4EtuIfmqQ/qnSXSKiU= -github.com/google/cel-go v0.19.0/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg= +github.com/google/cel-go v0.20.1 h1:nDx9r8S3L4pE61eDdt8igGj8rf5kjYR3ILxWIpWNi84= +github.com/google/cel-go v0.20.1/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg= github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg= github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= @@ -729,8 +736,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= -github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= +github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= +github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= @@ -760,11 +767,11 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= -github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE= -github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= @@ -890,8 +897,12 @@ github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17 github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f h1:p4VB7kIXpOQvVn1ZaTIVp+3vuYAXFe3OJEvjbUYJLaA= github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vladimirvivien/gexe v0.3.0 h1:4xwiOwGrDob5OMR6E92B9olDXYDglXdHhzR1ggYtWJM= +github.com/vladimirvivien/gexe v0.3.0/go.mod h1:fp7cy60ON1xjhtEI/+bfSEIXX35qgmI+iRYlGOqbBFM= github.com/vmware/govmomi v0.39.0 h1:soLZ08Q2zvjRSinNup8xVlw0KDDCJPPA1rIDmBhi7As= github.com/vmware/govmomi v0.39.0/go.mod h1:oHzAQ1r6152zYDGcUqeK+EO8LhKo5wjtvWZBGHws2Hc= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= @@ -1225,6 +1236,8 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/h2non/gock.v1 v1.1.2 h1:jBbHXgGBK/AoPVfJh5x4r/WxIrElvbLel8TCZkkZJoY= @@ -1269,18 +1282,22 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= howett.net/plist v1.0.1 h1:37GdZ8tP09Q35o9ych3ehygcsL+HqKSwzctveSlarvM= howett.net/plist v1.0.1/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g= -k8s.io/api v0.29.5 h1:levS+umUigHCfI3riD36pMY1vQEbrzh4r1ivVWAhHaI= -k8s.io/api v0.29.5/go.mod h1:7b18TtPcJzdjk7w5zWyIHgoAtpGeRvGGASxlS7UZXdQ= -k8s.io/apimachinery v0.29.5 h1:Hofa2BmPfpoT+IyDTlcPdCHSnHtEQMoJYGVoQpRTfv4= -k8s.io/apimachinery v0.29.5/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= -k8s.io/client-go v0.29.5 h1:nlASXmPQy190qTteaVP31g3c/wi2kycznkTP7Sv1zPc= -k8s.io/client-go v0.29.5/go.mod h1:aY5CnqUUvXYccJhm47XHoPcRyX6vouHdIBHaKZGTbK4= +k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= +k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= +k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk= +k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk= +k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= +k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0= +k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg= +k8s.io/component-base v0.31.1 h1:UpOepcrX3rQ3ab5NB6g5iP0tvsgJWzxTyAo20sgYSy8= +k8s.io/component-base v0.31.1/go.mod h1:WGeaw7t/kTsqpVTaCoVEtillbqAhF2/JgvO0LDOMa0w= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= kernel.org/pub/linux/libs/security/libcap/cap v1.2.57 h1:2nmqI+aw7EQZuelYktkQHBE4jESD2tOR+lOJEnv/Apo= kernel.org/pub/linux/libs/security/libcap/cap v1.2.57/go.mod h1:uI99C3r4SXvJeuqoEtx/eWt7UbmfqqZ80H8q+9t/A7I= kernel.org/pub/linux/libs/security/libcap/psx v1.2.57 h1:NOFATXSf5z/cMR3HIwQ3Xrd3nwnWl5xThmNr5U/F0pI= @@ -1289,9 +1306,13 @@ mvdan.cc/garble v0.12.1 h1:GyKeyqr4FKhWz12ZD9kKT9VnDqFILVYxgmAE8RKd3x8= mvdan.cc/garble v0.12.1/go.mod h1:rJ4GvtUEuVCRAYQkpd1iG6bolz9NEnkk0iu6gdTwWqA= nhooyr.io/websocket v1.8.11 h1:f/qXNc2/3DpoSZkHt1DQu6rj4zGC8JmkkLkWss0MgN0= nhooyr.io/websocket v1.8.11/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c= +sigs.k8s.io/controller-runtime v0.19.0 h1:nWVM7aq+Il2ABxwiCizrVDSlmDcshi9llbaFbC0ji/Q= +sigs.k8s.io/controller-runtime v0.19.0/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= +sigs.k8s.io/e2e-framework v0.5.0 h1:YLhk8R7EHuTFQAe6Fxy5eBzn5Vb+yamR5u8MH1Rq3cE= +sigs.k8s.io/e2e-framework v0.5.0/go.mod h1:jJSH8u2RNmruekUZgHAtmRjb5Wj67GErli9UjLSY7Zc= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go index bd72a558ba39..44aeb4a1e2cf 100644 --- a/x-pack/agentbeat/magefile.go +++ b/x-pack/agentbeat/magefile.go @@ -9,11 +9,9 @@ package main import ( "context" "fmt" - "log" + "github.com/elastic/beats/v7/dev-tools/mage/target/srvrlesstest" "os" - "os/exec" "path/filepath" - "strings" "time" "github.com/magefile/mage/sh" @@ -217,78 +215,8 @@ func PythonIntegTest(ctx context.Context) error { return devtools.PythonIntegTestFromHost(devtools.DefaultPythonTestIntegrationFromHostArgs()) } -// TestWithSpec executes unique commands from agentbeat.spec.yml and validates that app haven't exited with non-zero -func TestWithSpec(ctx context.Context) { - specPath := os.Getenv("AGENTBEAT_SPEC") - if specPath == "" { - log.Fatal("AGENTBEAT_SPEC is not defined\n") - } - - platform := os.Getenv("PLATFORM") - if platform == "" { - log.Fatal("PLATFORM is not defined\n") - } - - var commands = devtools.SpecCommands(specPath, platform) - - agentbeatPath := os.Getenv("AGENTBEAT_PATH") - - cmdResults := make(map[string]bool) - - for _, command := range commands { - cmdResults[command] = runCmd(agentbeatPath, strings.Split(command, " ")) - } - - hasFailures := false - for cmd, res := range cmdResults { - if res { - fmt.Printf("--- :large_green_circle: Succeeded: [%s.10s...]\n", cmd) - } else { - fmt.Printf("--- :bangbang: Failed: [%s.10s...]\n", cmd) - hasFailures = true - } - } - - if hasFailures { - fmt.Printf("Some inputs failed. Exiting with error\n") - os.Exit(1) - } -} - -func runCmd(agentbeatPath string, command []string) bool { - cmd := exec.Command(agentbeatPath, command...) - fmt.Printf("Executing: %s\n", cmd.String()) - - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Stdin = os.Stdin - - if err := cmd.Start(); err != nil { - fmt.Printf("failed to start command: %v\n", err) - } - - defer func() { - if err := cmd.Process.Kill(); err != nil { - fmt.Printf("failed to kill process: %v\n", err) - } else { - fmt.Print("command process killed\n") - } - }() - - done := make(chan error, 1) - go func() { - done <- cmd.Wait() - }() - timeout := 2 * time.Second - deadline := time.After(timeout) - - select { - case err := <-done: - fmt.Printf("command exited before %s: %v\n", timeout.String(), err) - return false - - case <-deadline: - fmt.Printf("%s\n", cmd.Stdout) - return true - } +// ServerlessTest starts serverless integration tests +func ServerlessTest(ctx context.Context, beat string) error { + devtools.TestBeatServerless(beat) + return srvrlesstest.IntegRunner(ctx, false, "TestBeatsServerless") } diff --git a/x-pack/agentbeat/testing/integration/README.md b/x-pack/agentbeat/testing/integration/README.md new file mode 100644 index 000000000000..5f6a6693b5f5 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/README.md @@ -0,0 +1 @@ +See [test-framework-dev-guide.md](../../docs/test-framework-dev-guide.md) diff --git a/x-pack/agentbeat/testing/integration/agent_long_running_leak_test.go b/x-pack/agentbeat/testing/integration/agent_long_running_leak_test.go new file mode 100644 index 000000000000..22b1e31cb15e --- /dev/null +++ b/x-pack/agentbeat/testing/integration/agent_long_running_leak_test.go @@ -0,0 +1,406 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "encoding/json" + "io" + "net" + "net/http" + "os" + "os/exec" + "regexp" + "runtime" + "strconv" + "strings" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/api/npipe" + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" + "github.com/elastic/elastic-agent/pkg/utils" + "github.com/elastic/go-sysinfo" + "github.com/elastic/go-sysinfo/types" +) + +// ExtendedRunner is the main test runner +type ExtendedRunner struct { + suite.Suite + info *define.Info + agentFixture *atesting.Fixture + ESHost string + healthCheckTime time.Duration + healthCheckRefreshTime time.Duration + + resourceWatchers []StatusWatcher +} + +// BeatStats is used to parse the result of a /stats call to the beat control socket +type BeatStats struct { + Beat struct { + Runtime struct { + Goroutines int `json:"goroutines"` + } `json:"runtime"` + } `json:"beat"` +} + +func TestLongRunningAgentForLeaks(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: "fleet", + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + {Type: define.Windows}, + }, + }) + + if os.Getenv("TEST_LONG_RUNNING") == "" { + t.Skip("not running extended test unless TEST_LONG_RUNNING is set") + } + + suite.Run(t, &ExtendedRunner{info: info, + healthCheckTime: time.Minute * 6, + healthCheckRefreshTime: time.Second * 20, + resourceWatchers: []StatusWatcher{ // select which tests to run + &handleMonitor{}, + &goroutinesMonitor{}, + }}) +} + +func (runner *ExtendedRunner) SetupSuite() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + cmd := exec.CommandContext(ctx, "go", "install", "-v", "github.com/mingrammer/flog@latest") + out, err := cmd.CombinedOutput() + require.NoError(runner.T(), err, "got out: %s", string(out)) + + cmd = exec.CommandContext(ctx, "flog", "-t", "log", "-f", "apache_error", "-o", "/var/log/httpd/error_log", "-b", "50485760", "-p", "1048576") + out, err = cmd.CombinedOutput() + require.NoError(runner.T(), err, "got out: %s", string(out)) + + policyUUID := uuid.Must(uuid.NewV4()).String() + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + fixture, err := define.NewFixtureFromLocalBuild(runner.T(), define.Version()) + require.NoError(runner.T(), err) + runner.agentFixture = fixture + + basePolicy := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + + policyResp, err := tools.InstallAgentWithPolicy(ctx, runner.T(), installOpts, runner.agentFixture, runner.info.KibanaClient, basePolicy) + require.NoError(runner.T(), err) + + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "system", "1.53.1", "agent_long_test_base_system_integ.json", uuid.Must(uuid.NewV4()).String(), policyResp.ID) + require.NoError(runner.T(), err) + + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "apache", "1.17.0", "agent_long_test_apache.json", uuid.Must(uuid.NewV4()).String(), policyResp.ID) + require.NoError(runner.T(), err) + +} + +func (runner *ExtendedRunner) TestHandleLeak() { + ctx, cancel := context.WithTimeout(context.Background(), time.Hour) + defer cancel() + + testRuntime := os.Getenv("LONG_TEST_RUNTIME") + if testRuntime == "" { + testRuntime = "15m" + } + + // block until we're sure agent is healthy + runner.CheckHealthAtStartup(ctx) + + // initialize the resource watchers that will sit and look for usage patterns + for _, mon := range runner.resourceWatchers { + mon.Init(ctx, runner.T(), runner.agentFixture) + } + + testDuration, err := time.ParseDuration(testRuntime) + require.NoError(runner.T(), err) + + timer := time.NewTimer(testDuration) + defer timer.Stop() + + ticker := time.NewTicker(time.Second * 10) + defer ticker.Stop() + + done := false + for !done { + select { + case <-timer.C: + done = true + case <-ticker.C: + err := runner.agentFixture.IsHealthy(ctx) + require.NoError(runner.T(), err) + // iterate through our watchers, update them + for _, mon := range runner.resourceWatchers { + mon.Update(runner.T(), runner.agentFixture) + } + } + } + + // we're measuring the handle usage as y=mx+b + // if the slope is increasing above a certain rate, fail the test + // A number of factors can change the slope during a test; shortened runtime (lots of handles allocated in the first few seconds, producing an upward slope), + // filebeat trying to open a large number of log files, etc + // handleSlopeFailure := 0.1 + for _, mon := range runner.resourceWatchers { + handleSlopeFailure := 0.1 + + for _, handle := range mon.GetSlopeHandlers() { + err := handle.Run() + require.NoError(runner.T(), err) + runner.T().Logf("=============================== %s", handle.Name()) + handleSlope := handle.GetSlope() + require.LessOrEqual(runner.T(), handleSlope, handleSlopeFailure, "increase in count of gorutines exceeded threshold for %s: %s", handle.Name(), handle.Debug()) + runner.T().Logf("Passed check for %s; component: %s", mon.Name(), handle.Name()) + runner.T().Logf("===============================") + } + } + + status, err := runner.agentFixture.ExecStatus(ctx) + require.NoError(runner.T(), err) + + // post-test: make sure that we actually ingested logs. + docs, err := estools.GetResultsForAgentAndDatastream(ctx, runner.info.ESClient, "apache.error", status.Info.ID) + assert.NoError(runner.T(), err, "error fetching apache logs") + assert.Greater(runner.T(), docs.Hits.Total.Value, 0, "could not find any matching apache logs for agent ID %s", status.Info.ID) + runner.T().Logf("Generated %d apache logs", docs.Hits.Total.Value) + + docs, err = estools.GetResultsForAgentAndDatastream(ctx, runner.info.ESClient, "system.cpu", status.Info.ID) + assert.NoError(runner.T(), err, "error fetching system metrics") + assert.Greater(runner.T(), docs.Hits.Total.Value, 0, "could not find any matching system metrics for agent ID %s", status.Info.ID) + runner.T().Logf("Generated %d system events", docs.Hits.Total.Value) +} + +// CheckHealthAtStartup ensures all the beats and agent are healthy and working before we continue +func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) { + // because we need to separately fetch the PIDs, wait until everything is healthy before we look for running beats + compDebugName := "" + require.Eventually(runner.T(), func() bool { + allHealthy := true + status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } + + apacheMatch := "logfile-apache" + foundApache := false + systemMatch := "system/metrics" + foundSystem := false + + for _, comp := range status.Components { + // make sure the components include the expected integrations + for _, v := range comp.Units { + runner.T().Logf("unit ID: %s", v.UnitID) + // the full unit ID will be something like "log-default-logfile-cef-3f0764f0-4ade-4f46-9ead-f2f0f7865676" + if !foundApache && strings.Contains(v.UnitID, apacheMatch) { + foundApache = true + } + if !foundSystem && strings.Contains(v.UnitID, systemMatch) { + foundSystem = true + } + runner.T().Logf("unit state: %s", v.Message) + if v.State != int(cproto.State_HEALTHY) { + allHealthy = false + } + } + runner.T().Logf("component state: %s", comp.Message) + if comp.State != int(cproto.State_HEALTHY) { + compDebugName = comp.Name + allHealthy = false + } + } + return allHealthy && foundApache && foundSystem + }, runner.healthCheckTime, runner.healthCheckRefreshTime, "install never became healthy: components did not return a healthy state: %s", compDebugName) +} + +/* +============================================================================= +Watchers for checking resource usage +============================================================================= +*/ + +type StatusWatcher interface { + Init(ctx context.Context, t *testing.T, status *atesting.Fixture) + Update(t *testing.T, fixture *atesting.Fixture) + GetSlopeHandlers() []tools.Slope + Name() string +} + +// goroutineWatcher tracks individual components under test +type goroutineWatcher struct { + httpClient http.Client + regGoroutines tools.Slope + compName string +} + +// goroutinesMonitor tracks thread usage across agent +type goroutinesMonitor struct { + handles []goroutineWatcher + startTime time.Time +} + +func (gm *goroutinesMonitor) Init(ctx context.Context, t *testing.T, fixture *atesting.Fixture) { + oldTop := paths.Top() + paths.SetTop("/opt/Elastic/Agent") + // fetch the unit ID of the component, use that to generate the path to the unix socket + status, err := fixture.ExecStatus(ctx) + if err != nil { + t.Logf("agent status returned an error: %v", err) + } + + for _, comp := range status.Components { + unitId := comp.ID + socketPath := utils.SocketURLWithFallback(unitId, paths.TempDir()) + handlesReg := tools.NewSlope(comp.Name) + watcher := goroutineWatcher{ + regGoroutines: handlesReg, + compName: comp.Name, + httpClient: http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + if runtime.GOOS != "windows" { + path := strings.Replace(socketPath, "unix://", "", -1) + return net.Dial("unix", path) + } else { + if strings.HasPrefix(socketPath, "npipe:///") { + path := strings.TrimPrefix(socketPath, "npipe:///") + socketPath = `\\.\pipe\` + path + } + return npipe.DialContext(socketPath)(ctx, "", "") + } + + }, + }, + }, + } + gm.handles = append(gm.handles, watcher) + + } + gm.startTime = time.Now() + paths.SetTop(oldTop) +} + +func (gm *goroutinesMonitor) Update(t *testing.T, fixture *atesting.Fixture) { + // reach out to the unix sockets to get the raw stats that includes a count of gorutines + for _, comp := range gm.handles { + resp, err := comp.httpClient.Get("http://unix/stats") + require.NoError(t, err) + respRaw, err := io.ReadAll(resp.Body) + require.NoError(t, err) + data := BeatStats{} + err = json.Unmarshal(respRaw, &data) + require.NoError(t, err) + resp.Body.Close() + + comp.regGoroutines.AddDatapoint(float64(data.Beat.Runtime.Goroutines), time.Since(gm.startTime)) + } +} + +func (gm *goroutinesMonitor) GetSlopeHandlers() []tools.Slope { + // handleSlopeFailure := 0.1 + slopes := []tools.Slope{} + for _, handle := range gm.handles { + slopes = append(slopes, handle.regGoroutines) + } + return slopes +} + +func (gm *goroutinesMonitor) Name() string { + return "goroutines" +} + +// process watcher is used to track the handle counts of individual running beats. +type processWatcher struct { + handle types.Process + pid int + name string + regHandles tools.Slope +} + +// handleMonitor tracks the rate of increase (slope) in running file handles +type handleMonitor struct { + handles []processWatcher + startTime time.Time +} + +func (handleMon *handleMonitor) Init(ctx context.Context, t *testing.T, fixture *atesting.Fixture) { + // track running beats + // the `last 30s` metrics tend to report gauges, which we can't use for calculating a derivative. + // so separately fetch the PIDs + pidInStatusMessageRegex := regexp.MustCompile(`[\d]+`) + status, err := fixture.ExecStatus(ctx) + if err != nil { + t.Logf("agent status returned an error: %v", err) + } + + for _, comp := range status.Components { + pidStr := pidInStatusMessageRegex.FindString(comp.Message) + pid, err := strconv.ParseInt(pidStr, 10, 64) + require.NoError(t, err) + + handle, err := sysinfo.Process(int(pid)) + require.NoError(t, err) + handlesReg := tools.NewSlope(comp.Name) + + t.Logf("created handle watcher for %s (%d)", comp.Name, pid) + handleMon.handles = append(handleMon.handles, processWatcher{handle: handle, pid: int(pid), name: comp.Name, regHandles: handlesReg}) + } + handleMon.startTime = time.Now() +} + +func (handleMon *handleMonitor) Update(t *testing.T, _ *atesting.Fixture) { + // for each running process, collect memory and handles + for _, handle := range handleMon.handles { + ohc, ok := handle.handle.(types.OpenHandleCounter) + if ok { + handleCount, err := ohc.OpenHandleCount() + require.NoError(t, err) + handle.regHandles.AddDatapoint(float64(handleCount), time.Since(handleMon.startTime)) + } + } +} + +func (handleMon *handleMonitor) GetSlopeHandlers() []tools.Slope { + slopes := []tools.Slope{} + for _, handle := range handleMon.handles { + slopes = append(slopes, handle.regHandles) + } + return slopes +} + +func (gm *handleMonitor) Name() string { + return "handles" +} diff --git a/x-pack/agentbeat/testing/integration/agent_long_test_apache.json b/x-pack/agentbeat/testing/integration/agent_long_test_apache.json new file mode 100644 index 000000000000..f1f3a6d27b29 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/agent_long_test_apache.json @@ -0,0 +1,251 @@ +{ + "id": "5cca0416-2c8e-43bb-a12f-108088a2d19c", + "version": "WzY3NiwxXQ==", + "name": "apache-1", + "namespace": "", + "description": "", + "package": { + "name": "apache", + "title": "Apache HTTP Server", + "version": "1.17.0" + }, + "enabled": true, + "inputs": [ + { + "type": "logfile", + "policy_template": "apache", + "enabled": true, + "streams": [ + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "apache.access" + }, + "vars": { + "paths": { + "value": [ + "/var/log/apache2/access.log*", + "/var/log/apache2/other_vhosts_access.log*", + "/var/log/httpd/access_log*" + ], + "type": "text" + }, + "tags": { + "value": [ + "apache-access" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "processors": { + "type": "yaml" + } + }, + "id": "logfile-apache.access-5cca0416-2c8e-43bb-a12f-108088a2d19c", + "compiled_stream": { + "paths": [ + "/var/log/apache2/access.log*", + "/var/log/apache2/other_vhosts_access.log*", + "/var/log/httpd/access_log*" + ], + "tags": [ + "apache-access" + ], + "exclude_files": [ + ".gz$" + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "apache.error" + }, + "vars": { + "paths": { + "value": [ + "/var/log/apache2/error.log*", + "/var/log/httpd/error_log*" + ], + "type": "text" + }, + "tags": { + "value": [ + "apache-error" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "processors": { + "type": "yaml" + } + }, + "id": "logfile-apache.error-5cca0416-2c8e-43bb-a12f-108088a2d19c", + "compiled_stream": { + "paths": [ + "/var/log/apache2/error.log*", + "/var/log/httpd/error_log*" + ], + "exclude_files": [ + ".gz$" + ], + "tags": [ + "apache-error" + ], + "processors": [ + { + "add_locale": null + } + ] + } + } + ], + "vars": { + "condition": { + "type": "text" + } + } + }, + { + "type": "httpjson", + "policy_template": "apache", + "enabled": false, + "streams": [ + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "apache.access" + }, + "vars": { + "interval": { + "value": "10s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"access*\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded", + "apache-access" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "processors": { + "type": "yaml" + }, + "enable_request_tracer": { + "type": "bool" + } + }, + "id": "httpjson-apache.access-5cca0416-2c8e-43bb-a12f-108088a2d19c" + }, + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "apache.error" + }, + "vars": { + "interval": { + "value": "10s", + "type": "text" + }, + "search": { + "value": "search sourcetype=apache:error OR sourcetype=apache_error", + "type": "text" + }, + "tags": { + "value": [ + "forwarded", + "apache-error" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "processors": { + "type": "yaml" + }, + "enable_request_tracer": { + "type": "bool" + } + }, + "id": "httpjson-apache.error-5cca0416-2c8e-43bb-a12f-108088a2d19c" + } + ], + "vars": { + "url": { + "value": "https://server.example.com:8089", + "type": "text" + }, + "username": { + "type": "text" + }, + "password": { + "type": "password" + }, + "token": { + "type": "password" + } + } + }, + { + "type": "apache/metrics", + "policy_template": "apache", + "enabled": false, + "streams": [ + { + "enabled": false, + "data_stream": { + "type": "metrics", + "dataset": "apache.status" + }, + "vars": { + "period": { + "value": "30s", + "type": "text" + }, + "server_status_path": { + "value": "/server-status", + "type": "text" + } + }, + "id": "apache/metrics-apache.status-5cca0416-2c8e-43bb-a12f-108088a2d19c" + } + ], + "vars": { + "hosts": { + "value": [ + "http://127.0.0.1" + ], + "type": "text" + }, + "condition": { + "type": "text" + } + } + } + ], + "revision": 1, + "created_at": "2024-02-01T16:52:06.512Z", + "created_by": "system", + "updated_at": "2024-02-01T16:52:06.512Z", + "updated_by": "system" +} \ No newline at end of file diff --git a/x-pack/agentbeat/testing/integration/agent_long_test_base_system_integ.json b/x-pack/agentbeat/testing/integration/agent_long_test_base_system_integ.json new file mode 100644 index 000000000000..e231beb173b3 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/agent_long_test_base_system_integ.json @@ -0,0 +1,788 @@ +{ + "id": "9bf446fc-58d4-4767-b42d-3450815d5d3d", + "version": "WzYzMSwxXQ==", + "name": "system-1", + "namespace": "default", + "package": { + "name": "system", + "title": "System", + "version": "1.53.0" + }, + "enabled": true, + "inputs": [ + { + "type": "logfile", + "policy_template": "system", + "enabled": true, + "streams": [ + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.auth" + }, + "vars": { + "ignore_older": { + "value": "72h", + "type": "text" + }, + "paths": { + "value": [ + "/var/log/auth.log*", + "/var/log/secure*" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "tags": { + "value": [ + "system-auth" + ], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "logfile-system.auth-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "ignore_older": "72h", + "paths": [ + "/var/log/auth.log*", + "/var/log/secure*" + ], + "exclude_files": [ + ".gz$" + ], + "multiline": { + "pattern": "^\\s", + "match": "after" + }, + "tags": [ + "system-auth" + ], + "processors": [ + { + "add_locale": null + }, + { + "rename": { + "fields": [ + { + "from": "message", + "to": "event.original" + } + ], + "ignore_missing": true, + "fail_on_error": false + } + }, + { + "syslog": { + "field": "event.original", + "ignore_missing": true, + "ignore_failure": true + } + } + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.syslog" + }, + "vars": { + "paths": { + "value": [ + "/var/log/messages*", + "/var/log/syslog*", + "/var/log/system*" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "exclude_files": { + "value": [ + "\\.gz$" + ], + "type": "text" + } + }, + "id": "logfile-system.syslog-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "paths": [ + "/var/log/messages*", + "/var/log/syslog*", + "/var/log/system*" + ], + "exclude_files": [ + "\\.gz$" + ], + "multiline": { + "pattern": "^\\s", + "match": "after" + }, + "processors": [ + { + "add_locale": null + } + ], + "tags": null, + "ignore_older": "72h" + } + } + ] + }, + { + "type": "winlog", + "policy_template": "system", + "enabled": true, + "streams": [ + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.application" + }, + "vars": { + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "event_id": { + "type": "text" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "language": { + "value": 0, + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "winlog-system.application-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "name": "Application", + "condition": "${host.platform} == 'windows'", + "ignore_older": "72h" + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.security" + }, + "vars": { + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "event_id": { + "type": "text" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "language": { + "value": 0, + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "winlog-system.security-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "name": "Security", + "condition": "${host.platform} == 'windows'", + "ignore_older": "72h" + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.system" + }, + "vars": { + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "event_id": { + "type": "text" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "language": { + "value": 0, + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "winlog-system.system-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "name": "System", + "condition": "${host.platform} == 'windows'", + "ignore_older": "72h" + } + } + ] + }, + { + "type": "system/metrics", + "policy_template": "system", + "enabled": true, + "streams": [ + { + "enabled": false, + "data_stream": { + "type": "metrics", + "dataset": "system.core" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "core.metrics": { + "value": [ + "percentages" + ], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.core-9bf446fc-58d4-4767-b42d-3450815d5d3d" + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.cpu" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "cpu.metrics": { + "value": [ + "percentages", + "normalized_percentages" + ], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.cpu-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "cpu" + ], + "cpu.metrics": [ + "percentages", + "normalized_percentages" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.diskio" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "diskio.include_devices": { + "value": [], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + } + }, + "id": "system/metrics-system.diskio-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "diskio" + ], + "diskio.include_devices": null, + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.filesystem" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "filesystem.ignore_types": { + "value": [], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "value": "\"\"", + "type": "yaml" + } + }, + "id": "system/metrics-system.filesystem-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "filesystem" + ], + "period": "1s", + "processors": [ + { + "drop_event.when.regexp": { + "system.filesystem.mount_point": "^/(sys|cgroup|proc|dev|etc|host|lib|snap)($|/)" + } + } + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.fsstat" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "value": "\"\"", + "type": "yaml" + } + }, + "id": "system/metrics-system.fsstat-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "fsstat" + ], + "period": "1s", + "processors": [ + { + "drop_event.when.regexp": { + "system.fsstat.mount_point": "^/(sys|cgroup|proc|dev|etc|host|lib|snap)($|/)" + } + } + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.load" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.load-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "load" + ], + "condition": "${host.platform} != 'windows'", + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.memory" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.memory-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "memory" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.network" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "network.interfaces": { + "value": [], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.network-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "network" + ], + "period": "1s", + "network.interfaces": null + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.process" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "process.include_top_n.by_cpu": { + "value": 5, + "type": "integer" + }, + "process.include_top_n.by_memory": { + "value": 5, + "type": "integer" + }, + "process.cmdline.cache.enabled": { + "value": true, + "type": "bool" + }, + "process.cgroups.enabled": { + "value": false, + "type": "bool" + }, + "process.env.whitelist": { + "value": [], + "type": "text" + }, + "process.include_cpu_ticks": { + "value": false, + "type": "bool" + }, + "processes": { + "value": [ + ".*" + ], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.process-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "process" + ], + "period": "1s", + "process.include_top_n.by_cpu": 5, + "process.include_top_n.by_memory": 5, + "process.cmdline.cache.enabled": true, + "process.cgroups.enabled": false, + "process.include_cpu_ticks": false, + "processes": [ + ".*" + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.process.summary" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.process.summary-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "process_summary" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.socket_summary" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.socket_summary-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "socket_summary" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.uptime" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.uptime-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "uptime" + ], + "period": "1s" + } + } + ], + "vars": { + "system.hostfs": { + "type": "text" + } + } + }, + { + "type": "httpjson", + "policy_template": "system", + "enabled": false, + "streams": [ + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "system.application" + }, + "vars": { + "interval": { + "value": "1s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"XmlWinEventLog:Application\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded" + ], + "type": "text" + } + }, + "id": "httpjson-system.application-9bf446fc-58d4-4767-b42d-3450815d5d3d" + }, + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "system.security" + }, + "vars": { + "interval": { + "value": "1s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"XmlWinEventLog:Security\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded" + ], + "type": "text" + } + }, + "id": "httpjson-system.security-9bf446fc-58d4-4767-b42d-3450815d5d3d" + }, + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "system.system" + }, + "vars": { + "interval": { + "value": "1s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"XmlWinEventLog:System\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded" + ], + "type": "text" + } + }, + "id": "httpjson-system.system-9bf446fc-58d4-4767-b42d-3450815d5d3d" + } + ], + "vars": { + "url": { + "value": "https://server.example.com:8089", + "type": "text" + }, + "enable_request_tracer": { + "type": "bool" + }, + "username": { + "type": "text" + }, + "password": { + "type": "password" + }, + "token": { + "type": "password" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + } + } + } + ] + } \ No newline at end of file diff --git a/x-pack/agentbeat/testing/integration/apm_propagation_test.go b/x-pack/agentbeat/testing/integration/apm_propagation_test.go new file mode 100644 index 000000000000..6325f941259c --- /dev/null +++ b/x-pack/agentbeat/testing/integration/apm_propagation_test.go @@ -0,0 +1,240 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "testing" + "text/template" + "time" + + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/go-elasticsearch/v8" + + "github.com/elastic/elastic-agent/pkg/control/v2/client" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" +) + +const agentConfigTemplateString = ` +outputs: + default: + type: fake-output +inputs: + - id: fake-apm + type: fake-apm +agent.monitoring: + traces: true + apm: + hosts: + - {{ .host }} + environment: {{ .environment }} + secret_token: {{ .secret_token }} + global_labels: + test_name: TestAPMConfig + test_type: Agent integration test + tls: + skip_verify: true +` + +func TestAPMConfig(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Default, + Stack: &define.Stack{}, + }) + f, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + deadline := time.Now().Add(10 * time.Minute) + ctx, cancel := testcontext.WithDeadline(t, context.Background(), deadline) + defer cancel() + + err = f.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + name := "fake-apm" + environment := info.Namespace + + agentConfig := generateAgentConfigForAPM(t, agentConfigTemplateString, info, environment) + t.Logf("Rendered agent config:\n%s", agentConfig) + + testAPMTraces := func(ctx context.Context) error { + state, err := f.Client().State(ctx) + require.NoError(t, err) + + t.Logf("agent state: %+v", state) + + // test that APM traces are being sent using initial configuration + require.Eventually(t, func() bool { + count, errCount := countAPMTraces(ctx, t, info.ESClient, name, environment) + if errCount != nil { + t.Logf("Error retrieving APM traces count for service %q and environment %q: %s", name, environment, errCount) + return false + } + return count > 0 + }, 1*time.Minute, time.Second) + + // change the configuration with a new environment and check that the update has been processed + environment = environment + "-changed" + modifiedAgentConfig := generateAgentConfigForAPM(t, agentConfigTemplateString, info, environment) + t.Logf("Rendered agent modified config:\n%s", modifiedAgentConfig) + err = f.Client().Configure(ctx, modifiedAgentConfig) + require.NoError(t, err, "error updating agent config with a new APM environment") + + // check that we receive traces with the new environment string + require.Eventually(t, func() bool { + count, errCount := countAPMTraces(ctx, t, info.ESClient, name, environment) + if errCount != nil { + t.Logf("Error retrieving APM traces count for service %q and environment %q: %s", name, environment, errCount) + return false + } + return count > 0 + }, 1*time.Minute, time.Second) + + return nil + } + + err = f.Run(ctx, atesting.State{ + Configure: agentConfig, + AgentState: atesting.NewClientState(client.Healthy), + Components: map[string]atesting.ComponentState{ + "fake-apm-default": { + State: atesting.NewClientState(client.Healthy), + Units: map[atesting.ComponentUnitKey]atesting.ComponentUnitState{ + atesting.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-apm-default"}: { + State: atesting.NewClientState(client.Healthy), + }, + atesting.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-apm-default-fake-apm"}: { + State: atesting.NewClientState(client.Healthy), + }, + }, + }, + }, + After: testAPMTraces, + }) + + require.NoError(t, err) + +} + +func countAPMTraces(ctx context.Context, t *testing.T, esClient *elasticsearch.Client, serviceName, environment string) (int, error) { + queryRaw := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "filter": []map[string]interface{}{ + { + "term": map[string]interface{}{ + "service.name": map[string]interface{}{ + "value": serviceName, + }, + }, + }, + { + "term": map[string]interface{}{ + "service.environment": map[string]interface{}{ + "value": environment, + }, + }, + }, + }, + }, + }, + } + + buf := new(bytes.Buffer) + err := json.NewEncoder(buf).Encode(queryRaw) + if err != nil { + return 0, fmt.Errorf("error encoding query: %w", err) + } + + count := esClient.Count + + response, err := count( + count.WithContext(ctx), + count.WithIndex("traces-apm-default"), + count.WithBody(buf), + ) + if err != nil { + return 0, fmt.Errorf("error executing query: %w", err) + } + + defer response.Body.Close() + + var body struct { + Count int + } + + // decoder := json.NewDecoder(response.Body) + // err = decoder.Decode(&body) + bodyBytes, _ := io.ReadAll(response.Body) + + t.Logf("received ES response: %s", bodyBytes) + err = json.Unmarshal(bodyBytes, &body) + + return body.Count, err +} + +// types to correctly parse the APM config we get from kibana API +type apmConfigResponse struct { + CloudStandaloneSetup CloudStandaloneSetup `json:"cloudStandaloneSetup,omitempty"` + IsFleetEnabled bool `json:"isFleetEnabled,omitempty"` + FleetAgents []FleetAgents `json:"fleetAgents,omitempty"` +} +type CloudStandaloneSetup struct { + ApmServerURL string `json:"apmServerUrl,omitempty"` + SecretToken string `json:"secretToken,omitempty"` +} +type FleetAgents struct { + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + ApmServerURL string `json:"apmServerUrl,omitempty"` + SecretToken string `json:"secretToken,omitempty"` +} + +func generateAgentConfigForAPM(t *testing.T, configTemplate string, info *define.Info, environment string) string { + t.Helper() + apmConfigData := getAPMConfigFromKibana(t, info.KibanaClient) + + configT, err := template.New("test config").Parse(configTemplate) + require.NoErrorf(t, err, "Error parsing agent config template\n%s", configTemplate) + + buf := new(strings.Builder) + templateData := map[string]any{ + "environment": environment, + "secret_token": apmConfigData.SecretToken, + "host": apmConfigData.ApmServerURL, + } + err = configT.Execute(buf, templateData) + require.NoErrorf(t, err, "Error rendering template\n%s\nwith data %v", configTemplate, templateData) + return buf.String() +} + +func getAPMConfigFromKibana(t *testing.T, kc *kibana.Client) CloudStandaloneSetup { + t.Helper() + response, err := kc.Send(http.MethodGet, "/internal/apm/fleet/agents", nil, nil, nil) + require.NoError(t, err, "Error getting APM connection params from kibana") + defer response.Body.Close() + + responseBytes, err := io.ReadAll(response.Body) + require.NoError(t, err, "Error reading data from http response") + apmConfig := new(apmConfigResponse) + err = json.Unmarshal(responseBytes, apmConfig) + require.NoError(t, err, "Error unmarshalling apm config") + require.NotEmpty(t, apmConfig.CloudStandaloneSetup.ApmServerURL, "APM config URL is empty") + require.NotEmpty(t, apmConfig.CloudStandaloneSetup.SecretToken, "APM config token is empty") + + return apmConfig.CloudStandaloneSetup +} diff --git a/x-pack/agentbeat/testing/integration/beats_serverless_test.go b/x-pack/agentbeat/testing/integration/beats_serverless_test.go new file mode 100644 index 000000000000..c8f875559cde --- /dev/null +++ b/x-pack/agentbeat/testing/integration/beats_serverless_test.go @@ -0,0 +1,631 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/url" + "os" + "path/filepath" + "strings" + "testing" + "text/template" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/mapstr" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" +) + +type BeatRunner struct { + suite.Suite + requirementsInfo *define.Info + agentFixture *atesting.Fixture + + // connection info + ESHost string + user string + pass string + kibHost string + + testUuid string + testbeatName string + + skipCleanup bool +} + +func TestBeatsServerless(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Default, + OS: []define.OS{ + {Type: define.Linux}, + }, + Stack: &define.Stack{}, + Local: false, + Sudo: true, + }) + + suite.Run(t, &BeatRunner{requirementsInfo: info}) +} + +func (runner *BeatRunner) SetupSuite() { + runner.skipCleanup = false + + runner.testbeatName = os.Getenv("TEST_BINARY_NAME") + if runner.testbeatName == "" { + runner.T().Fatalf("TEST_BINARY_NAME must be set") + } + if runner.testbeatName == "elastic-agent" { + runner.T().Skipf("tests must be run against a beat, not elastic-agent") + } + + if runner.testbeatName != "filebeat" && runner.testbeatName != "metricbeat" && runner.testbeatName != "auditbeat" && runner.testbeatName != "packetbeat" { + runner.T().Skip("test only supports metricbeat or filebeat") + } + runner.T().Logf("running serverless tests with %s", runner.testbeatName) + + agentFixture, err := define.NewFixtureWithBinary(runner.T(), define.Version(), runner.testbeatName, "/home/ubuntu", atesting.WithRunLength(time.Minute*3), atesting.WithAdditionalArgs([]string{"-E", "output.elasticsearch.allow_older_versions=true"})) + runner.agentFixture = agentFixture + require.NoError(runner.T(), err) + + // the require.* code will fail without these, so assume the values are non-nil + runner.ESHost = os.Getenv("ELASTICSEARCH_HOST") + runner.user = os.Getenv("ELASTICSEARCH_USERNAME") + runner.pass = os.Getenv("ELASTICSEARCH_PASSWORD") + runner.kibHost = os.Getenv("KIBANA_HOST") + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + beatOutConfig := ` +output.elasticsearch: + hosts: ["{{.es_host}}"] + api_key: "{{.key_user}}:{{.key_pass}}" +setup.kibana: + host: {{.kb_host}} +processors: + - add_fields: + target: host + fields: + test-id: {{.test_id}} +{{.beat_cfg}} +` + + mbCfg := ` +metricbeat.config.modules: + path: ${path.config}/modules.d/*.yml +` + + fbCfg := ` +filebeat.modules: + - module: system + syslog: + enabled: true + auth: + enabled: true +filebeat.config.modules: + - modules: system + syslog: + enabled: true + auth: + enabled: true +` + auditbeatCfg := ` +auditbeat.modules: + +- module: file_integrity + paths: + - /bin + - /usr/bin + - /sbin + - /usr/sbin + - /etc +` + + packetbeatCfg := ` +` + + tmpl, err := template.New("config").Parse(beatOutConfig) + require.NoError(runner.T(), err) + + apiResp, err := estools.CreateAPIKey(ctx, runner.requirementsInfo.ESClient, estools.APIKeyRequest{Name: "test-api-key", Expiration: "1d"}) + require.NoError(runner.T(), err) + + // beats likes to add standard ports to URLs that don't have them, and ESS will sometimes return a URL without a port, assuming :443 + // so try to fix that here + fixedKibanaHost := runner.kibHost + parsedKibana, err := url.Parse(runner.kibHost) + require.NoError(runner.T(), err) + if parsedKibana.Port() == "" { + fixedKibanaHost = fmt.Sprintf("%s:443", fixedKibanaHost) + } + + fixedESHost := runner.ESHost + parsedES, err := url.Parse(runner.ESHost) + require.NoError(runner.T(), err) + if parsedES.Port() == "" { + fixedESHost = fmt.Sprintf("%s:443", fixedESHost) + } + + runner.T().Logf("configuring beats with %s / %s", fixedESHost, fixedKibanaHost) + + testUuid, err := uuid.NewV4() + require.NoError(runner.T(), err) + runner.testUuid = testUuid.String() + + additionalCfg := mbCfg + if runner.testbeatName == "filebeat" { + additionalCfg = fbCfg + } else if runner.testbeatName == "auditbeat" { + additionalCfg = auditbeatCfg + } else if runner.testbeatName == "packetbeat" { + additionalCfg = packetbeatCfg + } + + tmpl_map := map[string]string{"es_host": fixedESHost, "key_user": apiResp.Id, "key_pass": apiResp.APIKey, "kb_host": fixedKibanaHost, "test_id": testUuid.String(), "beat_cfg": additionalCfg} + parsedCfg := bytes.Buffer{} + err = tmpl.Execute(&parsedCfg, tmpl_map) + require.NoError(runner.T(), err) + + err = runner.agentFixture.WriteFileToWorkDir(ctx, parsedCfg.String(), fmt.Sprintf("%s.yml", runner.testbeatName)) + require.NoError(runner.T(), err) +} + +// run the beat with default metricsets, ensure no errors in logs + data is ingested +func (runner *BeatRunner) TestRunAndCheckData() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*4) + defer cancel() + + // in case there's already a running template, delete it, forcing the beat to re-install + runner.CleanupTemplates(ctx) + + err := runner.agentFixture.RunBeat(ctx) + require.NoError(runner.T(), err) + + docs, err := estools.GetLatestDocumentMatchingQuery(ctx, runner.requirementsInfo.ESClient, map[string]interface{}{ + "match": map[string]interface{}{ + "host.test-id": runner.testUuid, + }, + }, fmt.Sprintf("*%s*", runner.testbeatName)) + require.NoError(runner.T(), err) + require.NotEmpty(runner.T(), docs.Hits.Hits) +} + +// tests the [beat] setup --dashboards command +func (runner *BeatRunner) TestSetupDashboards() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*3) //dashboards seem to take a while + defer cancel() + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", runner.agentFixture.WorkDir(), "setup", "--dashboards"}) + assert.NoError(runner.T(), err) + runner.T().Logf("got response from dashboard setup: %s", string(resp)) + require.True(runner.T(), strings.Contains(string(resp), "Loaded dashboards")) + + dashList, err := tools.GetDashboards(ctx, runner.requirementsInfo.KibanaClient) + require.NoError(runner.T(), err) + + // interesting hack in cases where we don't have a clean environment + // check to see if any of the dashboards were created recently + found := false + for _, dash := range dashList { + if time.Since(dash.UpdatedAt) < time.Minute*5 { + found = true + break + } + } + require.True(runner.T(), found, fmt.Sprintf("could not find dashboard newer than 5 minutes, out of %d dashboards", len(dashList))) + + runner.Run("export dashboards", runner.SubtestExportDashboards) + // cleanup + if !runner.skipCleanup { + for _, dash := range dashList { + err = tools.DeleteDashboard(ctx, runner.requirementsInfo.KibanaClient, dash.ID) + if err != nil { + runner.T().Logf("WARNING: could not delete dashboards after test: %s", err) + break + } + } + } +} + +// tests the [beat] export dashboard command +func (runner *BeatRunner) SubtestExportDashboards() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*2) + defer cancel() + outDir := runner.T().TempDir() + + dashlist, err := tools.GetDashboards(ctx, runner.requirementsInfo.KibanaClient) + require.NoError(runner.T(), err) + require.NotEmpty(runner.T(), dashlist) + + exportOut, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "export", + "dashboard", "--folder", outDir, "--id", dashlist[0].ID}) + + runner.T().Logf("got output: %s", exportOut) + assert.NoError(runner.T(), err) + + // The folder matches the major version of Kibana, so we read it from the API + dashboardFolder := fmt.Sprintf("/_meta/kibana/%d/dashboard", runner.requirementsInfo.KibanaClient.GetVersion().Major) + inFolder, err := os.ReadDir(filepath.Join(outDir, dashboardFolder)) + require.NoError(runner.T(), err) + runner.T().Logf("got log contents: %#v", inFolder) + require.NotEmpty(runner.T(), inFolder) +} + +// NOTE for the below tests: the testing framework doesn't guarantee a new stack instance each time, +// which means we might be running against a stack where a previous test has already done setup. +// perhaps CI should run `mage integration:clean` first? + +// tests the [beat] setup --pipelines command +func (runner *BeatRunner) TestSetupPipelines() { + if runner.testbeatName != "filebeat" { + runner.T().Skip("pipelines only available on filebeat") + } + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + defer func() { + // cleanup + if !runner.skipCleanup { + err := estools.DeletePipelines(ctx, runner.requirementsInfo.ESClient, "*filebeat*") + if err != nil { + runner.T().Logf("WARNING: could not clean up pipelines: %s", err) + } + } + + }() + + // need to actually enable something that has pipelines + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", runner.agentFixture.WorkDir(), + "setup", "--pipelines", "--modules", "apache", "-M", "apache.error.enabled=true", "-M", "apache.access.enabled=true"}) + assert.NoError(runner.T(), err) + + runner.T().Logf("got response from pipeline setup: %s", string(resp)) + + pipelines, err := estools.GetPipelines(ctx, runner.requirementsInfo.ESClient, "*filebeat*") + require.NoError(runner.T(), err) + require.NotEmpty(runner.T(), pipelines) + +} + +// test beat setup --index-management with ILM disabled +func (runner *BeatRunner) TestIndexManagementNoILM() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + defer func() { + runner.CleanupTemplates(ctx) + }() + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.ilm.enabled=false"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + assert.NoError(runner.T(), err) + // we should not print a warning if we've explicitly disabled ILM + assert.NotContains(runner.T(), string(resp), "not supported") + + tmpls, err := estools.GetIndexTemplatesForPattern(ctx, runner.requirementsInfo.ESClient, fmt.Sprintf("*%s*", runner.testbeatName)) + require.NoError(runner.T(), err) + for _, tmpl := range tmpls.IndexTemplates { + runner.T().Logf("got template: %s", tmpl.Name) + } + require.NotEmpty(runner.T(), tmpls.IndexTemplates) + + runner.Run("export templates", runner.SubtestExportTemplates) + runner.Run("export index patterns", runner.SubtestExportIndexPatterns) + +} + +// tests setup with all default settings +func (runner *BeatRunner) TestWithAllDefaults() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + defer func() { + runner.CleanupTemplates(ctx) + }() + + // pre-delete in case something else missed cleanup + runner.CleanupTemplates(ctx) + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.NoError(runner.T(), err) + + streams, err := estools.GetDataStreamsForPattern(ctx, runner.requirementsInfo.ESClient, fmt.Sprintf("%s*", runner.testbeatName)) + require.NoError(runner.T(), err) + + require.NotEmpty(runner.T(), streams.DataStreams) + +} + +// test the setup process with mismatching template and DSL names +func (runner *BeatRunner) TestCustomBadNames() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + defer func() { + runner.CleanupTemplates(ctx) + }() + + resp, err := runner.agentFixture.Exec(ctx, []string{"-e", "--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.dsl.enabled=true", "--E=setup.dsl.data_stream_pattern='custom-bad-name'", "--E=setup.template.name='custom-name'", "--E=setup.template.pattern='custom-name'"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.NoError(runner.T(), err) + + require.True(runner.T(), strings.Contains(string(resp), "Additional updates & overwrites to this config will not work.")) + +} + +func (runner *BeatRunner) TestOverwriteWithCustomName() { + //an updated policy that has a different value than the default of 7d + updatedPolicy := mapstr.M{ + "data_retention": "1d", + } + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + defer func() { + runner.CleanupTemplates(ctx) + }() + + lctemp := runner.T().TempDir() + raw, err := json.MarshalIndent(updatedPolicy, "", " ") + require.NoError(runner.T(), err) + + lifecyclePath := filepath.Join(lctemp, "dsl_policy.json") + + err = os.WriteFile(lifecyclePath, raw, 0o744) + require.NoError(runner.T(), err) + + runner.CleanupTemplates(ctx) + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.dsl.enabled=true", "--E=setup.dsl.data_stream_pattern='custom-name'", "--E=setup.template.name='custom-name'", "--E=setup.template.pattern='custom-name'"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.NoError(runner.T(), err) + + runner.CheckDSLPolicy(ctx, "*custom-name*", "7d") + + resp, err = runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.dsl.enabled=true", "--E=setup.dsl.overwrite=true", "--E=setup.dsl.data_stream_pattern='custom-name'", + "--E=setup.template.name='custom-name'", "--E=setup.template.pattern='custom-name'", fmt.Sprintf("--E=setup.dsl.policy_file=%s", lifecyclePath)}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.NoError(runner.T(), err) + + runner.CheckDSLPolicy(ctx, "*custom-name*", "1d") + +} + +// TestWithCustomLifecyclePolicy uploads a custom DSL policy +func (runner *BeatRunner) TestWithCustomLifecyclePolicy() { + //create a custom policy file + dslPolicy := mapstr.M{ + "data_retention": "1d", + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + defer func() { + runner.CleanupTemplates(ctx) + }() + + lctemp := runner.T().TempDir() + raw, err := json.MarshalIndent(dslPolicy, "", " ") + require.NoError(runner.T(), err) + + lifecyclePath := filepath.Join(lctemp, "dsl_policy.json") + + err = os.WriteFile(lifecyclePath, raw, 0o744) + require.NoError(runner.T(), err) + + runner.CleanupTemplates(ctx) + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.dsl.enabled=true", fmt.Sprintf("--E=setup.dsl.policy_file=%s", lifecyclePath)}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.NoError(runner.T(), err) + + runner.CheckDSLPolicy(ctx, fmt.Sprintf("%s*", runner.testbeatName), "1d") + +} + +// tests beat setup --index-management with ILM explicitly set +// On serverless, this should fail. +func (runner *BeatRunner) TestIndexManagementILMEnabledFailure() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + info, err := estools.GetPing(ctx, runner.requirementsInfo.ESClient) + require.NoError(runner.T(), err) + + if info.Version.BuildFlavor != "serverless" { + runner.T().Skip("must run on serverless") + } + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.ilm.enabled=true", "--E=setup.ilm.overwrite=true"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.Error(runner.T(), err) + assert.Contains(runner.T(), string(resp), "error creating") +} + +// tests setup with both ILM and DSL enabled, should fail +func (runner *BeatRunner) TestBothLifecyclesEnabled() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.ilm.enabled=true", "--E=setup.dsl.enabled=true"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.Error(runner.T(), err) +} + +// disable all lifecycle management, ensure it's actually disabled +func (runner *BeatRunner) TestAllLifecyclesDisabled() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + defer func() { + runner.CleanupTemplates(ctx) + }() + + runner.CleanupTemplates(ctx) + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "setup", + "--index-management", + "--E=setup.ilm.enabled=false", "--E=setup.dsl.enabled=false"}) + runner.T().Logf("got response from management setup: %s", string(resp)) + require.NoError(runner.T(), err) + + // make sure we have data streams, but there's no lifecycles + streams, err := estools.GetDataStreamsForPattern(ctx, runner.requirementsInfo.ESClient, fmt.Sprintf("*%s*", runner.testbeatName)) + require.NoError(runner.T(), err) + + require.NotEmpty(runner.T(), streams.DataStreams, "found no datastreams") + foundPolicy := false + for _, stream := range streams.DataStreams { + if stream.Lifecycle.DataRetention != "" { + foundPolicy = true + break + } + } + require.False(runner.T(), foundPolicy, "Found a lifecycle policy despite disabling lifecycles. Found: %#v", streams) +} + +// the export command doesn't actually make a network connection, +// so this won't fail +func (runner *BeatRunner) TestExport() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + info, err := estools.GetPing(ctx, runner.requirementsInfo.ESClient) + require.NoError(runner.T(), err) + + if info.Version.BuildFlavor != "serverless" { + runner.T().Skip("must run on serverless") + } + + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "export", "ilm-policy", "--E=setup.ilm.enabled=true"}) + runner.T().Logf("got response from export: %s", string(resp)) + assert.NoError(runner.T(), err) + // check to see if we got a valid output + policy := map[string]interface{}{} + err = json.Unmarshal(resp, &policy) + require.NoError(runner.T(), err) + + require.NotEmpty(runner.T(), policy["policy"]) +} + +// tests beat export with DSL +func (runner *BeatRunner) TestExportDSL() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + resp, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "export", "ilm-policy", "--E=setup.dsl.enabled=true"}) + runner.T().Logf("got response from export: %s", string(resp)) + assert.NoError(runner.T(), err) + // check to see if we got a valid output + policy := map[string]interface{}{} + err = json.Unmarshal(resp, &policy) + require.NoError(runner.T(), err) + + require.NotEmpty(runner.T(), policy["data_retention"]) +} + +func (runner *BeatRunner) SubtestExportTemplates() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*2) + defer cancel() + outDir := runner.T().TempDir() + + _, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "export", + "template", "--dir", outDir}) + assert.NoError(runner.T(), err) + + inFolder, err := os.ReadDir(filepath.Join(outDir, "/template")) + require.NoError(runner.T(), err) + runner.T().Logf("got log contents: %#v", inFolder) + require.NotEmpty(runner.T(), inFolder) +} + +func (runner *BeatRunner) SubtestExportIndexPatterns() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*2) + defer cancel() + + rawPattern, err := runner.agentFixture.Exec(ctx, []string{"--path.home", + runner.agentFixture.WorkDir(), + "export", + "index-pattern"}) + assert.NoError(runner.T(), err) + + idxPattern := map[string]interface{}{} + + err = json.Unmarshal(rawPattern, &idxPattern) + require.NoError(runner.T(), err) + require.NotNil(runner.T(), idxPattern["attributes"]) +} + +// CheckDSLPolicy checks if we have a match for the given DSL policy given a template name and policy data_retention +func (runner *BeatRunner) CheckDSLPolicy(ctx context.Context, tmpl string, policy string) { + streams, err := estools.GetDataStreamsForPattern(ctx, runner.requirementsInfo.ESClient, tmpl) + require.NoError(runner.T(), err) + + foundCustom := false + for _, stream := range streams.DataStreams { + if stream.Lifecycle.DataRetention == policy { + foundCustom = true + break + } + } + + require.True(runner.T(), foundCustom, "did not find our lifecycle policy. Found: %#v", streams) +} + +// CleanupTemplates removes any existing index +func (runner *BeatRunner) CleanupTemplates(ctx context.Context) { + if !runner.skipCleanup { + _ = estools.DeleteIndexTemplatesDataStreams(ctx, runner.requirementsInfo.ESClient, fmt.Sprintf("%s*", runner.testbeatName)) + _ = estools.DeleteIndexTemplatesDataStreams(ctx, runner.requirementsInfo.ESClient, "*custom-name*") + } +} diff --git a/x-pack/agentbeat/testing/integration/container_cmd_test.go b/x-pack/agentbeat/testing/integration/container_cmd_test.go new file mode 100644 index 000000000000..3eb6669f578e --- /dev/null +++ b/x-pack/agentbeat/testing/integration/container_cmd_test.go @@ -0,0 +1,491 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httputil" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "text/template" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/pkg/core/process" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" +) + +func createPolicy( + t *testing.T, + ctx context.Context, + agentFixture *atesting.Fixture, + info *define.Info, + policyName string, + dataOutputID string) (string, string) { + + createPolicyReq := kibana.AgentPolicy{ + Name: policyName, + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + if dataOutputID != "" { + createPolicyReq.DataOutputID = dataOutputID + } + + // Create policy + policy, err := info.KibanaClient.CreatePolicy(ctx, createPolicyReq) + if err != nil { + t.Fatalf("could not create Agent Policy: %s", err) + } + + // Create enrollment API key + createEnrollmentAPIKeyReq := kibana.CreateEnrollmentAPIKeyRequest{ + PolicyID: policy.ID, + } + + t.Logf("Creating enrollment API key...") + enrollmentToken, err := info.KibanaClient.CreateEnrollmentAPIKey(ctx, createEnrollmentAPIKeyReq) + if err != nil { + t.Fatalf("unable to create enrolment API key: %s", err) + } + + return policy.ID, enrollmentToken.APIKey +} + +func prepareAgentCMD( + t *testing.T, + ctx context.Context, + agentFixture *atesting.Fixture, + args []string, + env []string) (*exec.Cmd, *strings.Builder) { + + cmd, err := agentFixture.PrepareAgentCommand(ctx, args) + if err != nil { + t.Fatalf("could not prepare agent command: %s", err) + } + + t.Cleanup(func() { + if cmd.Process != nil { + t.Log(">> cleaning up: killing the Elastic-Agent process") + if err := cmd.Process.Kill(); err != nil { + t.Fatalf("could not kill Elastic-Agent process: %s", err) + } + + // Kill does not wait for the process to finish, so we wait here + state, err := cmd.Process.Wait() + if err != nil { + t.Errorf("Elastic-Agent exited with error after kill signal: %s", err) + t.Errorf("Elastic-Agent exited with status %d", state.ExitCode()) + out, err := cmd.CombinedOutput() + if err == nil { + t.Log(string(out)) + } + } + + return + } + t.Log(">> cleaning up: no process to kill") + }) + + agentOutput := strings.Builder{} + cmd.Stderr = &agentOutput + cmd.Stdout = &agentOutput + cmd.Env = append(os.Environ(), env...) + return cmd, &agentOutput +} + +func TestContainerCMD(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: true, + OS: []define.OS{ + {Type: define.Linux}, + }, + Group: "container", + }) + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // prepare must be called otherwise `agentFixture.WorkDir()` will be empty + // and it must be set so the `STATE_PATH` below gets a valid path. + err = agentFixture.Prepare(ctx) + require.NoError(t, err) + + fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient) + if err != nil { + t.Fatalf("could not get Fleet URL: %s", err) + } + + _, enrollmentToken := createPolicy( + t, + ctx, + agentFixture, + info, + fmt.Sprintf("%s-%s", t.Name(), uuid.Must(uuid.NewV4()).String()), + "") + env := []string{ + "FLEET_ENROLL=1", + "FLEET_URL=" + fleetURL, + "FLEET_ENROLLMENT_TOKEN=" + enrollmentToken, + // As the agent isn't built for a container, it's upgradable, triggering + // the start of the upgrade watcher. If `STATE_PATH` isn't set, the + // upgrade watcher will commence from a different path within the + // container, distinct from the current execution path. + "STATE_PATH=" + agentFixture.WorkDir(), + } + + cmd, agentOutput := prepareAgentCMD(t, ctx, agentFixture, []string{"container"}, env) + t.Logf(">> running binary with: %v", cmd.Args) + if err := cmd.Start(); err != nil { + t.Fatalf("error running container cmd: %s", err) + } + + require.Eventuallyf(t, func() bool { + // This will return errors until it connects to the agent, + // they're mostly noise because until the agent starts running + // we will get connection errors. If the test fails + // the agent logs will be present in the error message + // which should help to explain why the agent was not + // healthy. + err = agentFixture.IsHealthy(ctx, withEnv(env)) + return err == nil + }, + 5*time.Minute, time.Second, + "Elastic-Agent did not report healthy. Agent status error: \"%v\", Agent logs\n%s", + err, agentOutput, + ) +} + +func TestContainerCMDWithAVeryLongStatePath(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: true, + OS: []define.OS{ + {Type: define.Linux}, + }, + Group: "container", + }) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient) + if err != nil { + t.Fatalf("could not get Fleet URL: %s", err) + } + + testCases := map[string]struct { + statePath string + expectedStatePath string + expectedSocketPath string + expectError bool + }{ + "small path": { // Use the set path + statePath: filepath.Join(os.TempDir(), "foo", "bar"), + expectedStatePath: filepath.Join(os.TempDir(), "foo", "bar"), + expectedSocketPath: "/tmp/foo/bar/data/smp7BzlzcwgrLK4PUxpu7G1O5UwV4adr.sock", + }, + "no path set": { // Use the default path + statePath: "", + expectedStatePath: "/usr/share/elastic-agent/state", + expectedSocketPath: "/usr/share/elastic-agent/state/data/Td8I7R-Zby36_zF_IOd9QVNlFblNEro3.sock", + }, + "long path": { // Path too long to create a unix socket, it will use /tmp/elastic-agent + statePath: "/tmp/ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + expectedStatePath: "/tmp/ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + expectedSocketPath: "/tmp/elastic-agent/Xegnlbb8QDcqNLPzyf2l8PhVHjWvlQgZ.sock", + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + _, enrollmentToken := createPolicy( + t, + ctx, + agentFixture, + info, + fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + "") + + env := []string{ + "FLEET_ENROLL=1", + "FLEET_URL=" + fleetURL, + "FLEET_ENROLLMENT_TOKEN=" + enrollmentToken, + "STATE_PATH=" + tc.statePath, + } + + cmd, agentOutput := prepareAgentCMD(t, ctx, agentFixture, []string{"container"}, env) + t.Logf(">> running binary with: %v", cmd.Args) + if err := cmd.Start(); err != nil { + t.Fatalf("error running container cmd: %s", err) + } + + require.Eventuallyf(t, func() bool { + // This will return errors until it connects to the agent, + // they're mostly noise because until the agent starts running + // we will get connection errors. If the test fails + // the agent logs will be present in the error message + // which should help to explain why the agent was not + // healthy. + err = agentFixture.IsHealthy(ctx, withEnv(env)) + return err == nil + }, + 1*time.Minute, time.Second, + "Elastic-Agent did not report healthy. Agent status error: \"%v\", Agent logs\n%s", + err, agentOutput, + ) + + t.Cleanup(func() { + _ = os.RemoveAll(tc.expectedStatePath) + }) + + // Now that the Elastic-Agent is healthy, check that the control socket path + // is the expected one + if _, err := os.Stat(tc.expectedStatePath); err != nil { + t.Errorf("cannot stat expected state path ('%s'): %s", tc.expectedStatePath, err) + } + if _, err := os.Stat(tc.expectedSocketPath); err != nil { + t.Errorf("cannot stat expected socket path ('%s'): %s", tc.expectedSocketPath, err) + } + containerPaths := filepath.Join(tc.expectedStatePath, "container-paths.yml") + if _, err := os.Stat(tc.expectedSocketPath); err != nil { + t.Errorf("cannot stat expected container-paths.yml path ('%s'): %s", containerPaths, err) + } + + if t.Failed() { + containerPathsContent, err := os.ReadFile(containerPaths) + if err != nil { + t.Fatalf("could not read container-paths.yml: %s", err) + } + + t.Log("contents of 'container-paths-yml'") + t.Log(string(containerPathsContent)) + } + }) + } +} + +func withEnv(env []string) process.CmdOption { + return func(c *exec.Cmd) error { + c.Env = append(os.Environ(), env...) + return nil + } +} + +func TestContainerCMDEventToStderr(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: true, + OS: []define.OS{ + {Type: define.Linux}, + }, + Group: "container", + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // We call agentFixture.Prepare to set the workdir + require.NoError(t, agentFixture.Prepare(ctx), "failed preparing agent fixture") + + _, outputID := createMockESOutput(t, info) + policyID, enrollmentAPIKey := createPolicy( + t, + ctx, + agentFixture, + info, + fmt.Sprintf("%s-%s", t.Name(), uuid.Must(uuid.NewV4()).String()), + outputID) + + fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient) + if err != nil { + t.Fatalf("could not get Fleet URL: %s", err) + } + + env := []string{ + "FLEET_ENROLL=1", + "FLEET_URL=" + fleetURL, + "FLEET_ENROLLMENT_TOKEN=" + enrollmentAPIKey, + "STATE_PATH=" + agentFixture.WorkDir(), + // That is what we're interested in testing + "EVENTS_TO_STDERR=true", + } + + cmd, agentOutput := prepareAgentCMD(t, ctx, agentFixture, []string{"container"}, env) + addLogIntegration(t, info, policyID, "/tmp/flog.log") + generateLogFile(t, "/tmp/flog.log", time.Second/2, 100) + + t.Logf(">> running binary with: %v", cmd.Args) + if err := cmd.Start(); err != nil { + t.Fatalf("error running container cmd: %s", err) + } + + assert.Eventuallyf(t, func() bool { + // This will return errors until it connects to the agent, + // they're mostly noise because until the agent starts running + // we will get connection errors. If the test fails + // the agent logs will be present in the error message + // which should help to explain why the agent was not + // healthy. + err := agentFixture.IsHealthy(ctx, withEnv(env)) + return err == nil + }, + 2*time.Minute, time.Second, + "Elastic-Agent did not report healthy. Agent status error: \"%v\", Agent logs\n%s", + err, agentOutput, + ) + + assert.Eventually(t, func() bool { + agentOutputStr := agentOutput.String() + scanner := bufio.NewScanner(strings.NewReader(agentOutputStr)) + for scanner.Scan() { + if strings.Contains(scanner.Text(), "Cannot index event") { + return true + } + } + + return false + }, 3*time.Minute, 10*time.Second, "cannot find events on stderr") +} + +func createMockESOutput(t *testing.T, info *define.Info) (string, string) { + mockesURL := startMockES(t) + createOutputBody := ` +{ + "id": "mock-es-%[1]s", + "name": "mock-es-%[1]s", + "type": "elasticsearch", + "is_default": false, + "hosts": [ + "%s" + ], + "preset": "latency" +} +` + // The API will return an error if the output ID/name contains an + // UUID substring, so we replace the '-' by '_' to keep the API happy. + outputUUID := strings.Replace(uuid.Must(uuid.NewV4()).String(), "-", "_", -1) + bodyStr := fmt.Sprintf(createOutputBody, outputUUID, mockesURL) + bodyReader := strings.NewReader(bodyStr) + // THE URL IS MISSING + status, result, err := info.KibanaClient.Request(http.MethodPost, "/api/fleet/outputs", nil, nil, bodyReader) + if err != nil { + t.Fatalf("could execute request to create output: %#v, status: %d, result:\n%s\nBody:\n%s", err, status, string(result), bodyStr) + } + if status != http.StatusOK { + t.Fatalf("creating output failed. Status code %d, response\n:%s", status, string(result)) + } + + outputResp := struct { + Item struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + IsDefault bool `json:"is_default"` + Hosts []string `json:"hosts"` + Preset string `json:"preset"` + IsDefaultMonitoring bool `json:"is_default_monitoring"` + } `json:"item"` + }{} + + if err := json.Unmarshal(result, &outputResp); err != nil { + t.Errorf("could not decode create output response: %s", err) + t.Logf("Response:\n%s", string(result)) + } + + return mockesURL, outputResp.Item.ID +} + +func addLogIntegration(t *testing.T, info *define.Info, policyID, logFilePath string) { + agentPolicyBuilder := strings.Builder{} + tmpl, err := template.New(t.Name() + "custom-log-policy").Parse(policyJSON) + if err != nil { + t.Fatalf("cannot parse template: %s", err) + } + + err = tmpl.Execute(&agentPolicyBuilder, policyVars{ + Name: "Log-Input-" + t.Name() + "-" + time.Now().Format(time.RFC3339), + PolicyID: policyID, + LogFilePath: logFilePath, + Dataset: "logs", + Namespace: "default", + }) + if err != nil { + t.Fatalf("could not render template: %s", err) + } + // We keep a copy of the policy for debugging prurposes + agentPolicy := agentPolicyBuilder.String() + + // Call Kibana to create the policy. + // Docs: https://www.elastic.co/guide/en/fleet/current/fleet-api-docs.html#create-integration-policy-api + resp, err := info.KibanaClient.Connection.Send( + http.MethodPost, + "/api/fleet/package_policies", + nil, + nil, + bytes.NewBufferString(agentPolicy)) + if err != nil { + t.Fatalf("could not execute request to Kibana/Fleet: %s", err) + } + if resp.StatusCode != http.StatusOK { + // On error dump the whole request response so we can easily spot + // what went wrong. + t.Errorf("received a non 200-OK when adding package to policy. "+ + "Status code: %d", resp.StatusCode) + respDump, err := httputil.DumpResponse(resp, true) + if err != nil { + t.Fatalf("could not dump error response from Kibana: %s", err) + } + // Make debugging as easy as possible + t.Log("================================================================================") + t.Log("Kibana error response:") + t.Log(string(respDump)) + t.Log("================================================================================") + t.Log("Rendered policy:") + t.Log(agentPolicy) + t.Log("================================================================================") + t.FailNow() + } +} diff --git a/x-pack/agentbeat/testing/integration/delay_enroll_test.go b/x-pack/agentbeat/testing/integration/delay_enroll_test.go new file mode 100644 index 000000000000..9c66aac37034 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/delay_enroll_test.go @@ -0,0 +1,145 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/internal/pkg/agent/install" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/check" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +func TestDelayEnroll(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add the current time at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + DelayEnroll: true, + Privileged: false, + } + // Install the Elastic-Agent with the policy that was just + // created. + _, err = tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + agentFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + + // Start elastic-agent via service, this should do the enrollment + err = install.StartService("") // topPath can be blank as this is only starting the service + require.NoErrorf(t, err, "failed to start service") + + // check to make sure enroll worked + check.ConnectedToFleet(ctx, t, agentFixture, 5*time.Minute) +} + +func TestDelayEnrollUnprivileged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add a UUID at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + DelayEnroll: true, + Privileged: false, + } + // Install the Elastic-Agent with the policy that was just + // created. + _, err = tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + agentFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + + // Start elastic-agent via service, this should do the enrollment + err = install.StartService("") // topPath can be blank as this is only starting the service + require.NoErrorf(t, err, "failed to start service") + + // check to make sure enroll worked + check.ConnectedToFleet(ctx, t, agentFixture, 5*time.Minute) +} diff --git a/x-pack/agentbeat/testing/integration/diagnostics_test.go b/x-pack/agentbeat/testing/integration/diagnostics_test.go new file mode 100644 index 000000000000..041932104970 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/diagnostics_test.go @@ -0,0 +1,379 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "archive/zip" + "context" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/pkg/control/v2/client" + integrationtest "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +const diagnosticsArchiveGlobPattern = "elastic-agent-diagnostics-*.zip" + +var diagnosticsFiles = []string{ + "package.version", + "agent-info.yaml", + "allocs.pprof.gz", + "block.pprof.gz", + "components-actual.yaml", + "components-expected.yaml", + "computed-config.yaml", + "goroutine.pprof.gz", + "heap.pprof.gz", + "local-config.yaml", + "mutex.pprof.gz", + "pre-config.yaml", + "local-config.yaml", + "state.yaml", + "threadcreate.pprof.gz", + "variables.yaml", + "version.txt", +} + +var compDiagnosticsFiles = []string{ + "allocs.pprof.gz", + "block.pprof.gz", + "goroutine.pprof.gz", + "heap.pprof.gz", + "mutex.pprof.gz", + "threadcreate.pprof.gz", +} + +var componentSetup = map[string]integrationtest.ComponentState{ + "fake-default": { + State: integrationtest.NewClientState(client.Healthy), + Units: map[integrationtest.ComponentUnitKey]integrationtest.ComponentUnitState{ + integrationtest.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-default"}: { + State: integrationtest.NewClientState(client.Healthy), + }, + integrationtest.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-default-fake"}: { + State: integrationtest.NewClientState(client.Healthy), + }, + }, + }, +} + +var isolatedUnitsComponentSetup = map[string]integrationtest.ComponentState{ + "fake-isolated-units-default-fake-isolated-units": { + State: integrationtest.NewClientState(client.Healthy), + Units: map[integrationtest.ComponentUnitKey]integrationtest.ComponentUnitState{ + integrationtest.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-isolated-units-default-fake-isolated-units"}: { + State: integrationtest.NewClientState(client.Healthy), + }, + integrationtest.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-isolated-units-default-fake-isolated-units-unit"}: { + State: integrationtest.NewClientState(client.Healthy), + }, + }, + }, + "fake-isolated-units-default-fake-isolated-units-1": { + State: integrationtest.NewClientState(client.Healthy), + Units: map[integrationtest.ComponentUnitKey]integrationtest.ComponentUnitState{ + integrationtest.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-isolated-units-default-fake-isolated-units-1"}: { + State: integrationtest.NewClientState(client.Healthy), + }, + integrationtest.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-isolated-units-default-fake-isolated-units-1-unit"}: { + State: integrationtest.NewClientState(client.Healthy), + }, + }, + }, +} + +type componentAndUnitNames struct { + name string + unitNames []string +} + +func TestDiagnosticsOptionalValues(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + Local: false, + }) + + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + err = fixture.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + diagpprof := append(diagnosticsFiles, "cpu.pprof") + diagCompPprof := append(compDiagnosticsFiles, "cpu.pprof") + + err = fixture.Run(ctx, integrationtest.State{ + Configure: simpleConfig2, + AgentState: integrationtest.NewClientState(client.Healthy), + Components: componentSetup, + After: testDiagnosticsFactory(t, componentSetup, diagpprof, diagCompPprof, fixture, []string{"diagnostics", "-p"}), + }) + require.NoError(t, err) +} + +func TestIsolatedUnitsDiagnosticsOptionalValues(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + Local: false, + }) + + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + err = fixture.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + diagpprof := append(diagnosticsFiles, "cpu.pprof") + diagCompPprof := append(compDiagnosticsFiles, "cpu.pprof") + + err = fixture.Run(ctx, integrationtest.State{ + Configure: complexIsolatedUnitsConfig, + AgentState: integrationtest.NewClientState(client.Healthy), + Components: isolatedUnitsComponentSetup, + After: testDiagnosticsFactory(t, isolatedUnitsComponentSetup, diagpprof, diagCompPprof, fixture, []string{"diagnostics", "-p"}), + }) + require.NoError(t, err) +} + +func TestDiagnosticsCommand(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + Local: false, + }) + + f, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + err = f.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + err = f.Run(ctx, integrationtest.State{ + Configure: simpleConfig2, + AgentState: integrationtest.NewClientState(client.Healthy), + Components: componentSetup, + After: testDiagnosticsFactory(t, componentSetup, diagnosticsFiles, compDiagnosticsFiles, f, []string{"diagnostics", "collect"}), + }) + assert.NoError(t, err) +} + +func TestIsolatedUnitsDiagnosticsCommand(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + Local: false, + }) + + f, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + err = f.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + err = f.Run(ctx, integrationtest.State{ + Configure: complexIsolatedUnitsConfig, + AgentState: integrationtest.NewClientState(client.Healthy), + Components: isolatedUnitsComponentSetup, + After: testDiagnosticsFactory(t, isolatedUnitsComponentSetup, diagnosticsFiles, compDiagnosticsFiles, f, []string{"diagnostics", "collect"}), + }) + assert.NoError(t, err) +} + +func testDiagnosticsFactory(t *testing.T, compSetup map[string]integrationtest.ComponentState, diagFiles []string, diagCompFiles []string, fix *integrationtest.Fixture, cmd []string) func(ctx context.Context) error { + return func(ctx context.Context) error { + diagZip, err := fix.ExecDiagnostics(ctx, cmd...) + + // get the version of the running agent + avi, err := getRunningAgentVersion(ctx, fix) + require.NoError(t, err) + + verifyDiagnosticArchive(t, compSetup, diagZip, diagFiles, diagCompFiles, avi) + + return nil + } +} + +func verifyDiagnosticArchive(t *testing.T, compSetup map[string]integrationtest.ComponentState, diagArchive string, diagFiles []string, diagCompFiles []string, avi *client.Version) { + // check that the archive is not an empty file + stat, err := os.Stat(diagArchive) + require.NoErrorf(t, err, "stat file %q failed", diagArchive) + require.Greaterf(t, stat.Size(), int64(0), "file %s has incorrect size", diagArchive) + + // extract the zip file into a temp folder + extractionDir := t.TempDir() + + extractZipArchive(t, diagArchive, extractionDir) + + compAndUnitNames := extractComponentAndUnitNames(compSetup) + expectedDiagArchiveFilePatterns := compileExpectedDiagnosticFilePatterns(avi, diagFiles, diagCompFiles, compAndUnitNames) + + expectedExtractedFiles := map[string]struct{}{} + for _, filePattern := range expectedDiagArchiveFilePatterns { + absFilePattern := filepath.Join(extractionDir, filePattern.pattern) + files, err := filepath.Glob(absFilePattern) + assert.NoErrorf(t, err, "error globbing with pattern %q", absFilePattern) + min := 0 + if filePattern.optional { + min = -1 + } + assert.Greaterf(t, len(files), min, "glob pattern %q matched no files", absFilePattern) + for _, f := range files { + expectedExtractedFiles[f] = struct{}{} + } + } + + actualExtractedDiagFiles := map[string]struct{}{} + + err = filepath.WalkDir(extractionDir, func(path string, entry fs.DirEntry, err error) error { + require.NoErrorf(t, err, "error walking extracted path %q", path) + + // we are not interested in directories + if !entry.IsDir() { + actualExtractedDiagFiles[path] = struct{}{} + info, err := entry.Info() + require.NoError(t, err, path) + assert.Greaterf(t, info.Size(), int64(0), "file %q has an invalid size", path) + } + + return nil + }) + require.NoErrorf(t, err, "error walking output directory %q", extractionDir) + + assert.ElementsMatch(t, extractKeysFromMap(expectedExtractedFiles), extractKeysFromMap(actualExtractedDiagFiles)) +} + +func extractComponentAndUnitNames(compSetup map[string]integrationtest.ComponentState) []componentAndUnitNames { + comps := make([]componentAndUnitNames, 0, len(compSetup)) + for compName, compState := range compSetup { + unitNames := make([]string, 0, len(compState.Units)) + for unitKey := range compState.Units { + unitNames = append(unitNames, unitKey.UnitID) + } + comps = append(comps, componentAndUnitNames{ + name: compName, + unitNames: unitNames, + }) + } + return comps +} + +func extractZipArchive(t *testing.T, zipFile string, dst string) { + t.Helper() + + zReader, err := zip.OpenReader(zipFile) + require.NoErrorf(t, err, "file %q is not a valid zip archive", zipFile) + defer zReader.Close() + + t.Logf("extracting diagnostic archive in dir %q", dst) + for _, zf := range zReader.File { + filePath := filepath.Join(dst, zf.Name) + t.Logf("unzipping file %q", filePath) + require.Truef(t, strings.HasPrefix(filePath, filepath.Clean(dst)+string(os.PathSeparator)), "file %q points outside of extraction dir %q", filePath, dst) + + if zf.FileInfo().IsDir() { + t.Logf("creating directory %q", filePath) + err := os.MkdirAll(filePath, os.ModePerm) + assert.NoError(t, err) + continue + } + + err = os.MkdirAll(filepath.Dir(filePath), os.ModePerm) + require.NoErrorf(t, err, "error creating parent folder for file %q", filePath) + + extractSingleFileFromArchive(t, zf, filePath) + + } +} + +func extractSingleFileFromArchive(t *testing.T, src *zip.File, dst string) { + dstFile, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, src.Mode()) + require.NoErrorf(t, err, "error creating extracted file %q", dst) + + defer dstFile.Close() + + srcFile, err := src.Open() + require.NoErrorf(t, err, "error opening zipped file %q", src.Name) + + defer srcFile.Close() + + _, err = io.Copy(dstFile, srcFile) + require.NoErrorf(t, err, "error copying content from zipped file %q to extracted file %q", src.Name, dst) +} + +func getRunningAgentVersion(ctx context.Context, f *integrationtest.Fixture) (*client.Version, error) { + avi, err := f.Client().Version(ctx) + if err != nil { + return nil, err + } + + return &avi, err +} + +func compileExpectedDiagnosticFilePatterns(avi *client.Version, diagfiles []string, diagCompFiles []string, comps []componentAndUnitNames) []filePattern { + files := make([]filePattern, 0, len(diagnosticsFiles)+len(comps)*len(compDiagnosticsFiles)) + + for _, file := range diagfiles { + files = append(files, filePattern{ + pattern: file, + optional: false, + }) + } + + for _, comp := range comps { + compPath := path.Join("components", comp.name) + for _, fileName := range diagCompFiles { + files = append(files, + filePattern{ + pattern: path.Join(compPath, fileName), + optional: false, + }) + } + } + + files = append(files, filePattern{ + pattern: path.Join("logs", "elastic-agent-"+avi.Commit[:6], "elastic-agent-*.ndjson"), + optional: false, + }) + // this pattern overlaps with the previous one but filepath.Glob() does not seem to match using '?' wildcard + // optional: it doesn't have to be there (in some cases the watcher has not written any logs) + files = append(files, filePattern{ + pattern: path.Join("logs", "elastic-agent-"+avi.Commit[:6], "elastic-agent-watcher-*.ndjson"), + optional: true, + }) + + return files +} + +func extractKeysFromMap[K comparable, V any](src map[K]V) []K { + keys := make([]K, 0, len(src)) + for k := range src { + keys = append(keys, k) + } + return keys +} + +type filePattern struct { + pattern string + optional bool +} diff --git a/x-pack/agentbeat/testing/integration/endpoint_security_package.json.tmpl b/x-pack/agentbeat/testing/integration/endpoint_security_package.json.tmpl new file mode 100644 index 000000000000..0bb32d58f501 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/endpoint_security_package.json.tmpl @@ -0,0 +1,316 @@ +{ + "id": "{{.ID}}", + "name": "{{.Name}}", + "namespace": "default", + "package": { + "name": "endpoint", + "version": "{{.Version}}" + }, + "policy_id": "{{.PolicyID}}", + "vars": {}, + "inputs": [ + { + "type": "endpoint", + "enabled": true, + "streams": [], + "config": { + "integration_config": { + "value": { + "type": "endpoint", + "endpointConfig": { + "preset": "EDRComplete" + } + } + }, + "artifact_manifest": { + "value": { + "manifest_version": "1.0.0", + "schema_version": "v1", + "artifacts": { + "endpoint-exceptionlist-macos-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-exceptionlist-macos-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-exceptionlist-windows-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-exceptionlist-windows-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-exceptionlist-linux-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-exceptionlist-linux-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-trustlist-macos-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-trustlist-macos-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-trustlist-windows-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-trustlist-windows-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-trustlist-linux-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-trustlist-linux-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-eventfilterlist-macos-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-eventfilterlist-macos-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-eventfilterlist-windows-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-eventfilterlist-windows-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-eventfilterlist-linux-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-eventfilterlist-linux-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-hostisolationexceptionlist-macos-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-hostisolationexceptionlist-macos-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-hostisolationexceptionlist-windows-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-hostisolationexceptionlist-windows-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-hostisolationexceptionlist-linux-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-hostisolationexceptionlist-linux-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-blocklist-macos-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-blocklist-macos-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-blocklist-windows-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-blocklist-windows-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + }, + "endpoint-blocklist-linux-v1": { + "encryption_algorithm": "none", + "decoded_sha256": "d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "decoded_size": 14, + "encoded_sha256": "f8e6afa1d5662f5b37f83337af774b5785b5b7f1daee08b7b00c2d6813874cda", + "encoded_size": 22, + "relative_url": "/api/fleet/artifacts/endpoint-blocklist-linux-v1/d801aa1fb7ddcc330a5e3173372ea6af4a3d08ec58074478e85aa5603e926658", + "compression_algorithm": "zlib" + } + } + } + }, + "policy": { + "value": { + "meta": { + "license": "trial", + "cloud": true + }, + "windows": { + "events": { + "credential_access": true, + "dll_and_driver_load": true, + "dns": true, + "file": true, + "network": true, + "process": true, + "registry": true, + "security": true + }, + "malware": { + "mode": "prevent", + "blocklist": true + }, + "ransomware": { + "mode": "prevent", + "supported": true + }, + "memory_protection": { + "mode": "prevent", + "supported": true + }, + "behavior_protection": { + "mode": "prevent", + "supported": true + }, + "popup": { + "malware": { + "message": "", + "enabled": true + }, + "ransomware": { + "message": "", + "enabled": true + }, + "memory_protection": { + "message": "", + "enabled": true + }, + "behavior_protection": { + "message": "", + "enabled": true + } + }, + "logging": { + "file": "info" + }, + "antivirus_registration": { + "enabled": false + }, + "attack_surface_reduction": { + "credential_hardening": { + "enabled": true + } + } + }, + "mac": { + "events": { + "process": true, + "file": true, + "network": true + }, + "malware": { + "mode": "prevent", + "blocklist": true + }, + "behavior_protection": { + "mode": "prevent", + "supported": true + }, + "memory_protection": { + "mode": "prevent", + "supported": true + }, + "popup": { + "malware": { + "message": "", + "enabled": true + }, + "behavior_protection": { + "message": "", + "enabled": true + }, + "memory_protection": { + "message": "", + "enabled": true + } + }, + "logging": { + "file": "info" + }, + "advanced": { + "capture_env_vars": "DYLD_INSERT_LIBRARIES,DYLD_FRAMEWORK_PATH,DYLD_LIBRARY_PATH,LD_PRELOAD" + } + }, + "linux": { + "events": { + "process": true, + "file": true, + "network": true, + "session_data": false, + "tty_io": false + }, + "malware": { + "mode": "prevent", + "blocklist": true + }, + "behavior_protection": { + "mode": "prevent", + "supported": true + }, + "memory_protection": { + "mode": "prevent", + "supported": true + }, + "popup": { + "malware": { + "message": "", + "enabled": true + }, + "behavior_protection": { + "message": "", + "enabled": true + }, + "memory_protection": { + "message": "", + "enabled": true + } + }, + "logging": { + "file": "info" + }, + "advanced": { + "capture_env_vars": "LD_PRELOAD,LD_LIBRARY_PATH" + } + } + } + } + } + } + ] + } diff --git a/x-pack/agentbeat/testing/integration/endpoint_security_test.go b/x-pack/agentbeat/testing/integration/endpoint_security_test.go new file mode 100644 index 000000000000..d53105c0c3f0 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/endpoint_security_test.go @@ -0,0 +1,896 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "archive/zip" + "context" + + "fmt" + "io/fs" + "os" + "path/filepath" + "runtime" + "slices" + "strings" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/control/v2/client" + "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +const ( + endpointHealthPollingTimeout = 2 * time.Minute +) + +var protectionTests = []struct { + name string + protected bool +}{ + { + name: "unprotected", + }, + { + name: "protected", + protected: true, + }, +} + +// Tests that the agent can install and uninstall the endpoint-security service while remaining +// healthy. +// +// Installing endpoint-security requires a Fleet managed agent with the Elastic Defend integration +// installed. The endpoint-security service is uninstalled when the agent is uninstalled. +// +// The agent is automatically uninstalled as part of test cleanup when installed with +// fixture.Install via tools.InstallAgentWithPolicy. Failure to uninstall the agent will fail the +// test automatically. +func TestInstallAndCLIUninstallWithEndpointSecurity(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + }, + }) + + for _, tc := range protectionTests { + t.Run(tc.name, func(t *testing.T) { + testInstallAndCLIUninstallWithEndpointSecurity(t, info, tc.protected) + }) + } +} + +// Tests that the agent can install and uninstall the endpoint-security service while remaining +// healthy. In this case endpoint-security is uninstalled because the agent was unenrolled, which +// triggers the creation of an empty agent policy removing all inputs (only when not force +// unenrolling). The empty agent policy triggers the uninstall of endpoint because endpoint was +// removed from the policy. +// +// Like the CLI uninstall test, the agent is uninstalled from the command line at the end of the test +// but at this point endpoint is already uninstalled. +func TestInstallAndUnenrollWithEndpointSecurity(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + }, + }) + + for _, tc := range protectionTests { + t.Run(tc.name, func(t *testing.T) { + testInstallAndUnenrollWithEndpointSecurity(t, info, tc.protected) + }) + } +} + +// Tests that the agent can install and uninstall the endpoint-security service +// after the Elastic Defend integration was removed from the policy +// while remaining healthy. +// +// Installing endpoint-security requires a Fleet managed agent with the Elastic Defend integration +// installed. The endpoint-security service is uninstalled the Elastic Defend integration was removed from the policy. +// +// Like the CLI uninstall test, the agent is uninstalled from the command line at the end of the test +// but at this point endpoint should be already uninstalled. + +func TestInstallWithEndpointSecurityAndRemoveEndpointIntegration(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + }, + }) + + for _, tc := range protectionTests { + t.Run(tc.name, func(t *testing.T) { + testInstallWithEndpointSecurityAndRemoveEndpointIntegration(t, info, tc.protected) + }) + } +} + +// installSecurityAgent is a helper function to install an elastic-agent in priviliged mode with the force+non-interactve flags. +// the policy the agent is enrolled with can have protection enabled if passed +func installSecurityAgent(ctx context.Context, t *testing.T, info *define.Info, protected bool) (*atesting.Fixture, kibana.PolicyResponse) { + t.Helper() + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err, "could not create agent fixture") + + t.Log("Enrolling the agent in Fleet") + policyUUID := uuid.Must(uuid.NewV4()).String() + + createPolicyReq := buildPolicyWithTamperProtection( + kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + }, + protected, + ) + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + policy, err := tools.InstallAgentWithPolicy(ctx, t, + installOpts, fixture, info.KibanaClient, createPolicyReq) + require.NoError(t, err, "failed to install agent with policy") + return fixture, policy +} + +// buildPolicyWithTamperProtection helper function to build the policy request with or without tamper protection +func buildPolicyWithTamperProtection(policy kibana.AgentPolicy, protected bool) kibana.AgentPolicy { + if protected { + policy.AgentFeatures = append(policy.AgentFeatures, map[string]interface{}{ + "name": "tamper_protection", + "enabled": true, + }) + } + policy.IsProtected = protected + return policy +} + +func testInstallAndCLIUninstallWithEndpointSecurity(t *testing.T, info *define.Info, protected bool) { + deadline := time.Now().Add(10 * time.Minute) + ctx, cancel := testcontext.WithDeadline(t, context.Background(), deadline) + defer cancel() + + fixture, policy := installSecurityAgent(ctx, t, info, protected) + + t.Cleanup(func() { + t.Log("Un-enrolling Elastic Agent...") + // Use a separate context as the one in the test body will have been cancelled at this point. + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), time.Minute) + defer cleanupCancel() + assert.NoError(t, fleettools.UnEnrollAgent(cleanupCtx, info.KibanaClient, policy.ID)) + }) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policy.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + t.Log("Polling for endpoint-security to become Healthy") + ctx, cancel = context.WithTimeout(ctx, endpointHealthPollingTimeout) + defer cancel() + + agentClient := fixture.Client() + err = agentClient.Connect(ctx) + require.NoError(t, err, "could not connect to local agent") + + require.Eventually(t, + func() bool { return agentAndEndpointAreHealthy(t, ctx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + t.Log("Verified endpoint component and units are healthy") +} + +func testInstallAndUnenrollWithEndpointSecurity(t *testing.T, info *define.Info, protected bool) { + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + fixture, policy := installSecurityAgent(ctx, t, info, protected) + + t.Log("Installing Elastic Defend") + _, err := installElasticDefendPackage(t, info, policy.ID) + require.NoError(t, err) + + t.Log("Polling for endpoint-security to become Healthy") + ctx, cancel := context.WithTimeout(context.Background(), endpointHealthPollingTimeout) + defer cancel() + + agentClient := fixture.Client() + err = agentClient.Connect(ctx) + require.NoError(t, err) + + require.Eventually(t, + func() bool { return agentAndEndpointAreHealthy(t, ctx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + t.Log("Verified endpoint component and units are healthy") + + // Unenroll the agent + t.Log("Unenrolling the agent") + + hostname, err := os.Hostname() + require.NoError(t, err) + + agentID, err := fleettools.GetAgentIDByHostname(ctx, info.KibanaClient, policy.ID, hostname) + require.NoError(t, err) + + _, err = info.KibanaClient.UnEnrollAgent(ctx, kibana.UnEnrollAgentRequest{ID: agentID}) + require.NoError(t, err) + + t.Log("Waiting for inputs to stop") + require.Eventually(t, + func() bool { + state, err := agentClient.State(ctx) + if err != nil { + t.Logf("Error getting agent state: %s", err) + return false + } + + if state.State != client.Healthy { + t.Logf("Agent is not Healthy\n%+v", state) + return false + } + + if len(state.Components) != 0 { + t.Logf("Components have not been stopped and uninstalled!\n%+v", state) + return false + } + + return true + }, + endpointHealthPollingTimeout, + time.Second, + "All components not removed.", + ) + t.Log("Verified endpoint component and units are removed") + + // Verify that the Endpoint directory was correctly removed. + // Regression test for https://github.com/elastic/elastic-agent/issues/3077 + agentInstallPath := fixture.WorkDir() + files, err := os.ReadDir(filepath.Clean(filepath.Join(agentInstallPath, ".."))) + require.NoError(t, err) + + t.Logf("Checking directories at install path %s", agentInstallPath) + for _, f := range files { + if !f.IsDir() { + continue + } + + t.Log("Found directory", f.Name()) + require.False(t, strings.Contains(f.Name(), "Endpoint"), "Endpoint directory was not removed") + } +} + +func testInstallWithEndpointSecurityAndRemoveEndpointIntegration(t *testing.T, info *define.Info, protected bool) { + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + fixture, policy := installSecurityAgent(ctx, t, info, protected) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policy.ID) + require.NoErrorf(t, err, "Policy Response was: %#v", pkgPolicyResp) + + t.Log("Polling for endpoint-security to become Healthy") + ctx, cancel := context.WithTimeout(context.Background(), endpointHealthPollingTimeout) + defer cancel() + + agentClient := fixture.Client() + err = agentClient.Connect(ctx) + require.NoError(t, err) + + require.Eventually(t, + func() bool { return agentAndEndpointAreHealthy(t, ctx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + t.Log("Verified endpoint component and units are healthy") + + t.Logf("Removing Elastic Defend: %v", fmt.Sprintf("/api/fleet/package_policies/%v", pkgPolicyResp.Item.ID)) + _, err = info.KibanaClient.DeleteFleetPackage(ctx, pkgPolicyResp.Item.ID) + require.NoError(t, err) + + t.Log("Waiting for endpoint to stop") + require.Eventually(t, + func() bool { return agentIsHealthyNoEndpoint(t, ctx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are still present.", + ) + t.Log("Verified endpoint component and units are removed") + + // Verify that the Endpoint directory was correctly removed. + // Regression test for https://github.com/elastic/elastic-agent/issues/3077 + agentInstallPath := fixture.WorkDir() + files, err := os.ReadDir(filepath.Clean(filepath.Join(agentInstallPath, ".."))) + require.NoError(t, err) + + t.Logf("Checking directories at install path %s", agentInstallPath) + for _, f := range files { + if !f.IsDir() { + continue + } + + t.Log("Found directory", f.Name()) + // If Endpoint was not currently removed, let's see what was left + if strings.Contains(f.Name(), "Endpoint") { + info, err := f.Info() + if err != nil { + t.Logf("could not get file info for %q to check what was left"+ + "behind: %v", f.Name(), err) + } + ls, err := os.ReadDir(info.Name()) + if err != nil { + t.Logf("could not list fileson for %q to check what was left"+ + "behind: %v", f.Name(), err) + } + var dirEntries []string + for _, de := range ls { + dirEntries = append(dirEntries, de.Name()) + } + + if len(dirEntries) == 0 { + t.Fatalf("Endpoint directory was not removed, but it's empty") + } + t.Fatalf("Endpoint directory was not removed, the directory content is: %s", + strings.Join(dirEntries, ", ")) + } + } +} + +// This is a subset of kibana.AgentPolicyUpdateRequest, using until elastic-agent-libs PR https://github.com/elastic/elastic-agent-libs/pull/141 is merged +// TODO: replace with the elastic-agent-libs when available +type agentPolicyUpdateRequest struct { + // Name of the policy. Required in an update request. + Name string `json:"name"` + // Namespace of the policy. Required in an update request. + Namespace string `json:"namespace"` + IsProtected bool `json:"is_protected"` +} + +// Tests that install of Elastic Defend fails if Agent is installed in a base +// path other than default +func TestEndpointSecurityNonDefaultBasePath(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + t.Log("Enrolling the agent in Fleet") + policyUUID := uuid.Must(uuid.NewV4()).String() + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + BasePath: filepath.Join(paths.DefaultBasePath, "not_default"), + } + policyResp, err := tools.InstallAgentWithPolicy(ctx, t, installOpts, fixture, info.KibanaClient, createPolicyReq) + require.NoErrorf(t, err, "Policy Response was: %v", policyResp) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policyResp.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + c := fixture.Client() + + require.Eventually(t, func() bool { + err := c.Connect(ctx) + if err != nil { + t.Logf("connecting client to agent: %v", err) + return false + } + defer c.Disconnect() + state, err := c.State(ctx) + if err != nil { + t.Logf("error getting the agent state: %v", err) + return false + } + t.Logf("agent state: %+v", state) + if state.State != cproto.State_DEGRADED { + return false + } + for _, c := range state.Components { + if strings.Contains(c.Message, + "Elastic Defend requires Elastic Agent be installed at the default installation path") { + return true + } + } + return false + }, 2*time.Minute, 10*time.Second, "Agent never became DEGRADED with default install message") +} + +// Tests that install of Elastic Defend fails if Agent is installed unprivileged. +func TestEndpointSecurityUnprivileged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + + // Only supports Linux at the moment. + OS: []define.OS{ + { + Type: define.Linux, + }, + }, + }) + + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + t.Log("Enrolling the agent in Fleet") + policyUUID := uuid.Must(uuid.NewV4()).String() + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: false, // ensure always unprivileged + } + policyResp, err := tools.InstallAgentWithPolicy(ctx, t, installOpts, fixture, info.KibanaClient, createPolicyReq) + require.NoErrorf(t, err, "Policy Response was: %v", policyResp) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policyResp.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + c := fixture.Client() + + errMsg := "Elastic Defend requires Elastic Agent be running as root" + if runtime.GOOS == define.Windows { + errMsg = "Elastic Defend requires Elastic Agent be running as Administrator or SYSTEM" + } + require.Eventually(t, func() bool { + err := c.Connect(ctx) + if err != nil { + t.Logf("connecting client to agent: %v", err) + return false + } + defer c.Disconnect() + state, err := c.State(ctx) + if err != nil { + t.Logf("error getting the agent state: %v", err) + return false + } + t.Logf("agent state: %+v", state) + if state.State != cproto.State_DEGRADED { + return false + } + for _, c := range state.Components { + if strings.Contains(c.Message, errMsg) { + return true + } + } + return false + }, 2*time.Minute, 10*time.Second, "Agent never became DEGRADED with root/Administrator install message") +} + +// Tests that trying to switch from privileged to unprivileged with Elastic Defend fails. +func TestEndpointSecurityCannotSwitchToUnprivileged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + + // Only supports Linux at the moment. + OS: []define.OS{ + { + Type: define.Linux, + }, + }, + }) + + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + t.Log("Enrolling the agent in Fleet") + policyUUID := uuid.Must(uuid.NewV4()).String() + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, // ensure always privileged + } + policyResp, err := tools.InstallAgentWithPolicy(ctx, t, installOpts, fixture, info.KibanaClient, createPolicyReq) + require.NoErrorf(t, err, "Policy Response was: %v", policyResp) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policyResp.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + t.Log("Polling for endpoint-security to become Healthy") + healthyCtx, cancel := context.WithTimeout(ctx, endpointHealthPollingTimeout) + defer cancel() + + agentClient := fixture.Client() + err = agentClient.Connect(healthyCtx) + require.NoError(t, err) + + require.Eventually(t, + func() bool { return agentAndEndpointAreHealthy(t, healthyCtx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + t.Log("Verified endpoint component and units are healthy") + + performSwitchCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) + defer cancel() + output, err := fixture.Exec(performSwitchCtx, []string{"unprivileged", "-f"}) + require.Errorf(t, err, "unprivileged command should have failed") + assert.Contains(t, string(output), "unable to switch to unprivileged mode due to the following service based components having issues") + assert.Contains(t, string(output), "endpoint") +} + +// TestEndpointLogsAreCollectedInDiagnostics tests that diagnostics archive contain endpoint logs +func TestEndpointLogsAreCollectedInDiagnostics(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + }, + }) + + ctx, cn := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cn() + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + t.Log("Enrolling the agent in Fleet") + policyUUID := uuid.Must(uuid.NewV4()).String() + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + policyResp, err := tools.InstallAgentWithPolicy(ctx, t, installOpts, fixture, info.KibanaClient, createPolicyReq) + require.NoErrorf(t, err, "Policy Response was: %v", policyResp) + + t.Cleanup(func() { + t.Log("Un-enrolling Elastic Agent...") + // Use a separate context as the one in the test body will have been cancelled at this point. + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), time.Minute) + defer cleanupCancel() + assert.NoError(t, fleettools.UnEnrollAgent(cleanupCtx, info.KibanaClient, policyResp.ID)) + }) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policyResp.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + // wait for endpoint to be healthy + t.Log("Polling for endpoint-security to become Healthy") + pollingCtx, pollingCancel := context.WithTimeout(ctx, endpointHealthPollingTimeout) + defer pollingCancel() + + require.Eventually(t, + func() bool { + agentClient := fixture.Client() + err = agentClient.Connect(ctx) + if err != nil { + t.Logf("error connecting to agent: %v", err) + return false + } + defer agentClient.Disconnect() + return agentAndEndpointAreHealthy(t, pollingCtx, agentClient) + }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + + // get endpoint component name + endpointComponents := getEndpointComponents(ctx, t, fixture.Client()) + require.NotEmpty(t, endpointComponents, "there should be at least one endpoint component") + + t.Logf("endpoint components: %v", endpointComponents) + + outDir := t.TempDir() + diagFile := t.Name() + ".zip" + diagAbsPath := filepath.Join(outDir, diagFile) + _, err = fixture.Exec(ctx, []string{"diagnostics", "-f", diagAbsPath}) + require.NoError(t, err, "diagnostics command failed") + require.FileExists(t, diagAbsPath, "diagnostic archive should have been created") + checkDiagnosticsForEndpointFiles(t, diagAbsPath, endpointComponents) +} + +func getEndpointComponents(ctx context.Context, t *testing.T, c client.Client) []string { + + err := c.Connect(ctx) + require.NoError(t, err, "connecting to agent to retrieve endpoint components") + defer c.Disconnect() + + agentState, err := c.State(ctx) + require.NoError(t, err, "retrieving agent state") + + var endpointComponents []string + for _, componentState := range agentState.Components { + if strings.Contains(componentState.Name, "endpoint") { + endpointComponents = append(endpointComponents, componentState.ID) + } + } + return endpointComponents +} + +func checkDiagnosticsForEndpointFiles(t *testing.T, diagsPath string, endpointComponents []string) { + zipReader, err := zip.OpenReader(diagsPath) + require.NoError(t, err, "error opening diagnostics archive") + + defer func(zipReader *zip.ReadCloser) { + err := zipReader.Close() + assert.NoError(t, err, "error closing diagnostic archive") + }(zipReader) + + t.Logf("---- Contents of diagnostics archive") + for _, file := range zipReader.File { + t.Logf("%q - %+v", file.Name, file.FileHeader.FileInfo()) + } + t.Logf("---- End contents of diagnostics archive") + // check there are files under the components/ directory + for _, componentName := range endpointComponents { + endpointComponentDirName := fmt.Sprintf("components/%s", componentName) + endpointComponentDir, err := zipReader.Open(endpointComponentDirName) + if assert.NoErrorf(t, err, "error looking up directory %q for endpoint component %q in diagnostic archive: %v", endpointComponentDirName, componentName, err) { + defer func(endpointComponentDir fs.File) { + err := endpointComponentDir.Close() + if err != nil { + assert.NoError(t, err, "error closing endpoint component directory") + } + }(endpointComponentDir) + if assert.Implementsf(t, (*fs.ReadDirFile)(nil), endpointComponentDir, "endpoint component %q should have a directory in the diagnostic archive under %s", componentName, endpointComponentDirName) { + dirFile := endpointComponentDir.(fs.ReadDirFile) + endpointFiles, err := dirFile.ReadDir(-1) + assert.NoErrorf(t, err, "error reading endpoint component %q directory %q in diagnostic archive", componentName, endpointComponentDirName) + assert.NotEmptyf(t, endpointFiles, "endpoint component %q directory should not be empty", componentName) + } + } + } + + // check endpoint logs + servicesLogDirName := "logs/services" + servicesLogDir, err := zipReader.Open(servicesLogDirName) + if assert.NoErrorf(t, err, "error looking up directory %q in diagnostic archive: %v", servicesLogDirName, err) { + defer func(servicesLogDir fs.File) { + err := servicesLogDir.Close() + if err != nil { + assert.NoError(t, err, "error closing services logs directory") + } + }(servicesLogDir) + if assert.Implementsf(t, (*fs.ReadDirFile)(nil), servicesLogDir, "service logs should be in a directory in the diagnostic archive under %s", servicesLogDir) { + dirFile := servicesLogDir.(fs.ReadDirFile) + servicesLogFiles, err := dirFile.ReadDir(-1) + assert.NoError(t, err, "error reading services logs directory %q in diagnostic archive", servicesLogDirName) + assert.True(t, + slices.ContainsFunc(servicesLogFiles, + func(entry fs.DirEntry) bool { + return strings.HasPrefix(entry.Name(), "endpoint-") && strings.HasSuffix(entry.Name(), ".log") + }), + "service logs should contain endpoint-*.log files", + ) + } + } +} + +func agentIsHealthyNoEndpoint(t *testing.T, ctx context.Context, agentClient client.Client) bool { + t.Helper() + + state, err := agentClient.State(ctx) + if err != nil { + t.Logf("Error getting agent state: %s", err) + return false + } + + if state.State != client.Healthy { + t.Logf("Agent is not Healthy\n%+v", state) + return false + } + + foundEndpointComponent := false + foundEndpointInputUnit := false + foundEndpointOutputUnit := false + for _, comp := range state.Components { + isEndpointComponent := strings.Contains(comp.Name, "endpoint") + if isEndpointComponent { + foundEndpointComponent = true + } + if comp.State != client.Healthy { + t.Logf("Component is not Healthy\n%+v", comp) + return false + } + + for _, unit := range comp.Units { + if isEndpointComponent { + if unit.UnitType == client.UnitTypeInput { + foundEndpointInputUnit = true + } + if unit.UnitType == client.UnitTypeOutput { + foundEndpointOutputUnit = true + } + } + + if unit.State != client.Healthy { + t.Logf("Unit is not Healthy\n%+v", unit) + return false + } + } + } + + // Ensure both the endpoint input and output units were found and healthy. + if foundEndpointComponent || foundEndpointInputUnit || foundEndpointOutputUnit { + t.Logf("State did contain endpoint or endpoint units!\n%+v", state) + return false + } + + return true +} + +// TestForceInstallOverProtectedPolicy tests that running `elastic-agent install -f` +// when an installed agent is running a policy with tamper protection enabled fails. +func TestForceInstallOverProtectedPolicy(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + }, + }) + + deadline := time.Now().Add(10 * time.Minute) + ctx, cancel := testcontext.WithDeadline(t, context.Background(), deadline) + defer cancel() + + fixture, policy := installSecurityAgent(ctx, t, info, true) + + t.Cleanup(func() { + t.Log("Un-enrolling Elastic Agent...") + // Use a separate context as the one in the test body will have been cancelled at this point. + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), time.Minute) + defer cleanupCancel() + assert.NoError(t, fleettools.UnEnrollAgent(cleanupCtx, info.KibanaClient, policy.ID)) + }) + + t.Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(t, info, policy.ID) + require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp) + + t.Log("Polling for endpoint-security to become Healthy") + ctx, cancel = context.WithTimeout(ctx, endpointHealthPollingTimeout) + defer cancel() + + agentClient := fixture.Client() + err = agentClient.Connect(ctx) + require.NoError(t, err, "could not connect to local agent") + + require.Eventually(t, + func() bool { return agentAndEndpointAreHealthy(t, ctx, agentClient) }, + endpointHealthPollingTimeout, + time.Second, + "Endpoint component or units are not healthy.", + ) + t.Log("Verified endpoint component and units are healthy") + + t.Log("Run elastic-agent install -f...") + // We use the same policy with tamper protection enabled for this test and expect it to fail. + token, err := info.KibanaClient.CreateEnrollmentAPIKey(ctx, kibana.CreateEnrollmentAPIKeyRequest{ + PolicyID: policy.ID, + }) + require.NoError(t, err) + url, err := fleettools.DefaultURL(ctx, info.KibanaClient) + require.NoError(t, err) + + args := []string{ + "install", + "--force", + "--url", + url, + "--enrollment-token", + token.APIKey, + } + out, err := fixture.Exec(ctx, args) + require.Errorf(t, err, "No error detected, command output: %s", out) +} diff --git a/x-pack/agentbeat/testing/integration/endpoint_test_tools.go b/x-pack/agentbeat/testing/integration/endpoint_test_tools.go new file mode 100644 index 000000000000..09c4bbf17d80 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/endpoint_test_tools.go @@ -0,0 +1,133 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + _ "embed" + "encoding/json" + "fmt" + "strings" + "testing" + "text/template" + "time" + + "github.com/gofrs/uuid/v5" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/pkg/control/v2/client" + "github.com/elastic/elastic-agent/pkg/testing/define" +) + +//go:embed endpoint_security_package.json.tmpl +var endpointPackagePolicyTemplate string + +type endpointPackageTemplateVars struct { + ID string + Name string + PolicyID string + Version string +} + +// TODO: Setup a GitHub Action to update this for each release of https://github.com/elastic/endpoint-package +const endpointPackageVersion = "8.11.0" + +func agentAndEndpointAreHealthy(t *testing.T, ctx context.Context, agentClient client.Client) bool { + t.Helper() + + state, err := agentClient.State(ctx) + if err != nil { + t.Logf("Error getting agent state: %s", err) + return false + } + + if state.State != client.Healthy { + t.Logf("local Agent is not Healthy: current state: %+v", state) + return false + } + + foundEndpointInputUnit := false + foundEndpointOutputUnit := false + for _, comp := range state.Components { + isEndpointComponent := strings.Contains(comp.Name, "endpoint") + if comp.State != client.Healthy { + t.Logf("endpoint component is not Healthy: current state: %+v", comp) + return false + } + + for _, unit := range comp.Units { + if isEndpointComponent { + if unit.UnitType == client.UnitTypeInput { + foundEndpointInputUnit = true + } + if unit.UnitType == client.UnitTypeOutput { + foundEndpointOutputUnit = true + } + } + + if unit.State != client.Healthy { + t.Logf("unit %q is not Healthy\n%+v", unit.UnitID, unit) + return false + } + } + } + + // Ensure both the endpoint input and output units were found and healthy. + if !foundEndpointInputUnit || !foundEndpointOutputUnit { + t.Logf("State did not contain endpoint units (input: %v/output: %v) state: %+v. ", foundEndpointInputUnit, foundEndpointOutputUnit, state) + return false + } + + return true +} + +// Installs the Elastic Defend package to cause the agent to install the endpoint-security service. +func installElasticDefendPackage(t *testing.T, info *define.Info, policyID string) (r kibana.PackagePolicyResponse, err error) { + t.Helper() + + t.Log("Templating endpoint package policy request") + tmpl, err := template.New("pkgpolicy").Parse(endpointPackagePolicyTemplate) + if err != nil { + return r, fmt.Errorf("error creating new template: %w", err) + } + + packagePolicyID := uuid.Must(uuid.NewV4()).String() + var pkgPolicyBuf bytes.Buffer + + // Need unique name for Endpoint integration otherwise on multiple runs on the same instance you get + // http error response with code 409: {StatusCode:409 Error:Conflict Message:An integration policy with the name Defend-cbomziz4uvn5fov9t1gsrcvdwn2p1s7tefnvgsye already exists. Please rename it or choose a different name.} + err = tmpl.Execute(&pkgPolicyBuf, endpointPackageTemplateVars{ + ID: packagePolicyID, + Name: "Defend-" + packagePolicyID, + PolicyID: policyID, + Version: endpointPackageVersion, + }) + if err != nil { + return r, fmt.Errorf("error executing template: %w", err) + } + + // Make sure the templated value is actually valid JSON before making the API request. + // Using json.Unmarshal will give us the actual syntax error, calling json.Valid() would not. + packagePolicyReq := kibana.PackagePolicyRequest{} + err = json.Unmarshal(pkgPolicyBuf.Bytes(), &packagePolicyReq) + if err != nil { + return r, fmt.Errorf("templated package policy is not valid JSON: %s, %w", pkgPolicyBuf.String(), err) + } + + t.Log("POST /api/fleet/package_policies") + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + pkgResp, err := info.KibanaClient.InstallFleetPackage(ctx, packagePolicyReq) + if err != nil { + t.Logf("Error installing fleet package: %v", err) + return r, fmt.Errorf("error installing fleet package: %w", err) + } + t.Logf("Endpoint package Policy Response:\n%+v", pkgResp) + return pkgResp, err +} diff --git a/x-pack/agentbeat/testing/integration/event_logging_test.go b/x-pack/agentbeat/testing/integration/event_logging_test.go new file mode 100644 index 000000000000..90397c5af683 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/event_logging_test.go @@ -0,0 +1,404 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bufio" + "bytes" + "context" + "fmt" + "net/http" + "net/http/httputil" + "os" + "path" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +var eventLogConfig = ` +outputs: + default: + type: elasticsearch + hosts: + - %s + protocol: http + preset: balanced + +inputs: + - type: filestream + id: your-input-id + streams: + - id: your-filestream-stream-id + data_stream: + dataset: generic + paths: + - %s + +# Disable monitoring so there are less Beats running and less logs being generated. +agent.monitoring: + enabled: false + logs: false + metrics: false + pprof.enabled: false + use_output: default + +# Needed if you already have an Elastic-Agent running on your machine +# That's very helpful for running the tests locally +agent.monitoring: + http: + enabled: false + port: 7002 +agent.grpc: + address: localhost + port: 7001 +` + +func TestEventLogFile(t *testing.T) { + _ = define.Require(t, define.Requirements{ + Group: Default, + Stack: &define.Stack{}, + Local: true, + Sudo: false, + }) + ctx, cancel := testcontext.WithDeadline( + t, + context.Background(), + time.Now().Add(10*time.Minute)) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + esURL := startMockES(t) + + logFilepath := path.Join(t.TempDir(), t.Name()) + generateLogFile(t, logFilepath, time.Millisecond*100, 1) + + cfg := fmt.Sprintf(eventLogConfig, esURL, logFilepath) + + if err := agentFixture.Prepare(ctx); err != nil { + t.Fatalf("cannot prepare Elastic-Agent fixture: %s", err) + } + + if err := agentFixture.Configure(ctx, []byte(cfg)); err != nil { + t.Fatalf("cannot configure Elastic-Agent fixture: %s", err) + } + + cmd, err := agentFixture.PrepareAgentCommand(ctx, nil) + if err != nil { + t.Fatalf("cannot prepare Elastic-Agent command: %s", err) + } + + output := strings.Builder{} + cmd.Stderr = &output + cmd.Stdout = &output + + if err := cmd.Start(); err != nil { + t.Fatalf("could not start Elastic-Agent: %s", err) + } + + // Make sure the Elastic-Agent process is not running before + // exiting the test + t.Cleanup(func() { + // Ignore the error because we cancelled the context, + // and that always returns an error + _ = cmd.Wait() + if t.Failed() { + t.Log("Elastic-Agent output:") + t.Log(output.String()) + } + }) + + // Now the Elastic-Agent is running, so validate the Event log file. + requireEventLogFileExistsWithData(t, agentFixture) + + // The diagnostics command is already tested by another test, + // here we just want to validate the events log behaviour + // extract the zip file into a temp folder + expectedLogFiles, expectedEventLogFiles := getLogFilenames( + t, + filepath.Join(agentFixture.WorkDir(), + "data", + "elastic-agent-*", + "logs")) + + collectDiagnosticsAndVeriflyLogs( + t, + ctx, + agentFixture, + []string{"diagnostics", "collect"}, + append(expectedLogFiles, expectedEventLogFiles...)) + + collectDiagnosticsAndVeriflyLogs( + t, + ctx, + agentFixture, + []string{"diagnostics", "collect", "--exclude-events"}, + expectedLogFiles) +} + +func TestEventLogOutputConfiguredViaFleet(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: true, + OS: []define.OS{ + {Type: define.Linux}, + }, + Group: "container", + }) + t.Skip("Flaky test: https://github.com/elastic/elastic-agent/issues/5159") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + _, outputID := createMockESOutput(t, info) + policyName := fmt.Sprintf("%s-%s", t.Name(), uuid.Must(uuid.NewV4()).String()) + policyID, enrollmentAPIKey := createPolicy( + t, + ctx, + agentFixture, + info, + policyName, + outputID) + + fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient) + if err != nil { + t.Fatalf("could not get Fleet URL: %s", err) + } + + enrollArgs := []string{ + "enroll", + "--force", + "--skip-daemon-reload", + "--url", + fleetURL, + "--enrollment-token", + enrollmentAPIKey, + } + + addLogIntegration(t, info, policyID, "/tmp/flog.log") + generateLogFile(t, "/tmp/flog.log", time.Second/2, 100) + + enrollCmd, err := agentFixture.PrepareAgentCommand(ctx, enrollArgs) + if err != nil { + t.Fatalf("could not prepare enroll command: %s", err) + } + if out, err := enrollCmd.CombinedOutput(); err != nil { + t.Fatalf("error enrolling Elastic-Agent: %s\nOutput:\n%s", err, string(out)) + } + + runAgentCMD, agentOutput := prepareAgentCMD(t, ctx, agentFixture, nil, nil) + if err := runAgentCMD.Start(); err != nil { + t.Fatalf("could not start Elastic-Agent: %s", err) + } + + assert.Eventuallyf(t, func() bool { + // This will return errors until it connects to the agent, + // they're mostly noise because until the agent starts running + // we will get connection errors. If the test fails + // the agent logs will be present in the error message + // which should help to explain why the agent was not + // healthy. + err := agentFixture.IsHealthy(ctx) + return err == nil + }, + 2*time.Minute, time.Second, + "Elastic-Agent did not report healthy. Agent status error: \"%v\", Agent logs\n%s", + err, agentOutput, + ) + + // The default behaviour is to log events to the events log file + // so ensure this is happening + requireEventLogFileExistsWithData(t, agentFixture) + + // Add a policy overwrite to change the events output to stderr + addOverwriteToPolicy(t, info, policyName, policyID) + + // Ensure Elastic-Agent is healthy after the policy change + assert.Eventuallyf(t, func() bool { + // This will return errors until it connects to the agent, + // they're mostly noise because until the agent starts running + // we will get connection errors. If the test fails + // the agent logs will be present in the error message + // which should help to explain why the agent was not + // healthy. + err := agentFixture.IsHealthy(ctx) + return err == nil + }, + 2*time.Minute, time.Second, + "Elastic-Agent did not report healthy after policy change. Agent status error: \"%v\", Agent logs\n%s", + err, agentOutput, + ) + + // Ensure the events logs are going to stderr + assert.Eventually(t, func() bool { + agentOutputStr := agentOutput.String() + scanner := bufio.NewScanner(strings.NewReader(agentOutputStr)) + for scanner.Scan() { + if strings.Contains(scanner.Text(), "Cannot index event") { + return true + } + } + + return false + }, 3*time.Minute, 10*time.Second, "cannot find events on stderr") + +} + +func addOverwriteToPolicy(t *testing.T, info *define.Info, policyName, policyID string) { + addLoggingOverwriteBody := fmt.Sprintf(` +{ + "name": "%s", + "namespace": "default", + "overrides": { + "agent": { + "logging": { + "event_data": { + "to_stderr": true, + "to_files": false + } + } + } + } +} +`, policyName) + resp, err := info.KibanaClient.Send( + http.MethodPut, + fmt.Sprintf("/api/fleet/agent_policies/%s", policyID), + nil, + nil, + bytes.NewBufferString(addLoggingOverwriteBody), + ) + if err != nil { + t.Fatalf("could not execute request to Kibana/Fleet: %s", err) + } + if resp.StatusCode != http.StatusOK { + // On error dump the whole request response so we can easily spot + // what went wrong. + t.Errorf("received a non 200-OK when adding overwrite to policy. "+ + "Status code: %d", resp.StatusCode) + respDump, err := httputil.DumpResponse(resp, true) + if err != nil { + t.Fatalf("could not dump error response from Kibana: %s", err) + } + // Make debugging as easy as possible + t.Log("================================================================================") + t.Log("Kibana error response:") + t.Log(string(respDump)) + t.FailNow() + } +} + +func requireEventLogFileExistsWithData(t *testing.T, agentFixture *atesting.Fixture) { + // Now the Elastic-Agent is running, so validate the Event log file. + // Because the path changes based on the Elastic-Agent version, we + // use glob to find the file + var logFileName string + require.Eventually(t, func() bool { + // We ignore this error because the folder might not be there. + // Once the folder and file are there, then this call should succeed + // and we can read the file. + glob := filepath.Join( + agentFixture.WorkDir(), + "data", "elastic-agent-*", "logs", "events", "*") + files, err := filepath.Glob(glob) + if err != nil { + t.Fatalf("could not scan for the events log file: %s", err) + } + + if len(files) == 1 { + logFileName = files[0] + return true + } + + return false + + }, time.Minute, time.Second, "could not find event log file") + + logEntryBytes, err := os.ReadFile(logFileName) + if err != nil { + t.Fatalf("cannot read file '%s': %s", logFileName, err) + } + + logEntry := string(logEntryBytes) + expectedStr := "Cannot index event" + if !strings.Contains(logEntry, expectedStr) { + t.Errorf( + "did not find the expected log entry ('%s') in the events log file", + expectedStr) + t.Log("Event log file contents:") + t.Log(logEntry) + } +} + +func collectDiagnosticsAndVeriflyLogs( + t *testing.T, + ctx context.Context, + agentFixture *atesting.Fixture, + cmd, + expectedFiles []string) { + + diagPath, err := agentFixture.ExecDiagnostics(ctx, cmd...) + if err != nil { + t.Fatalf("could not execute diagnostics excluding events log: %s", err) + } + + extractionDir := t.TempDir() + extractZipArchive(t, diagPath, extractionDir) + diagLogFiles, diagEventLogFiles := getLogFilenames( + t, + filepath.Join(extractionDir, "logs", "elastic-agent*")) + allLogs := append(diagLogFiles, diagEventLogFiles...) + + require.ElementsMatch( + t, + expectedFiles, + allLogs, + "expected: 'listA', got: 'listB'") +} + +func getLogFilenames( + t *testing.T, + basepath string, +) (logFiles, eventLogFiles []string) { + + logFilesGlob := filepath.Join(basepath, "*.ndjson") + logFilesPath, err := filepath.Glob(logFilesGlob) + if err != nil { + t.Fatalf("could not get log file names:%s", err) + } + + for _, f := range logFilesPath { + logFiles = append(logFiles, filepath.Base(f)) + } + + eventLogFilesGlob := filepath.Join(basepath, "events", "*.ndjson") + eventLogFilesPath, err := filepath.Glob(eventLogFilesGlob) + if err != nil { + t.Fatalf("could not get log file names:%s", err) + } + + for _, f := range eventLogFilesPath { + eventLogFiles = append(eventLogFiles, filepath.Base(f)) + } + + return logFiles, eventLogFiles +} diff --git a/x-pack/agentbeat/testing/integration/fake_test.go b/x-pack/agentbeat/testing/integration/fake_test.go new file mode 100644 index 000000000000..1fa49f3e42b4 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/fake_test.go @@ -0,0 +1,179 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/pkg/control/v2/client" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +var simpleConfig1 = ` +outputs: + default: + type: fake-output +inputs: + - id: fake + type: fake + state: 1 + message: Configuring +` + +var simpleConfig2 = ` +outputs: + default: + type: fake-output +inputs: + - id: fake + type: fake + state: 2 + message: Healthy +` + +var simpleIsolatedUnitsConfig = ` +outputs: + default: + type: fake-output +inputs: + - id: fake-isolated-units + type: fake-isolated-units + state: 1 + message: Configuring +` + +var complexIsolatedUnitsConfig = ` +outputs: + default: + type: fake-output +inputs: + - id: fake-isolated-units + type: fake-isolated-units + state: 2 + message: Healthy + - id: fake-isolated-units-1 + type: fake-isolated-units + state: 2 + message: Healthy +` + +func TestFakeComponent(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + Local: true, + }) + + f, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + err = f.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + err = f.Run(ctx, atesting.State{ + Configure: simpleConfig1, + AgentState: atesting.NewClientState(client.Healthy), + Components: map[string]atesting.ComponentState{ + "fake-default": { + State: atesting.NewClientState(client.Healthy), + Units: map[atesting.ComponentUnitKey]atesting.ComponentUnitState{ + atesting.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-default"}: { + State: atesting.NewClientState(client.Healthy), + }, + atesting.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-default-fake"}: { + State: atesting.NewClientState(client.Configuring), + }, + }, + }, + }, + }, atesting.State{ + Configure: simpleConfig2, + AgentState: atesting.NewClientState(client.Healthy), + StrictComponents: map[string]atesting.ComponentState{ + "fake-default": { + State: atesting.NewClientState(client.Healthy), + Units: map[atesting.ComponentUnitKey]atesting.ComponentUnitState{ + atesting.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-default"}: { + State: atesting.NewClientState(client.Healthy), + }, + atesting.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-default-fake"}: { + State: atesting.NewClientState(client.Healthy), + }, + }, + }, + }, + }) + require.NoError(t, err) +} + +func TestFakeIsolatedUnitsComponent(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + Local: true, + }) + + f, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + err = f.Prepare(ctx, fakeComponent) + require.NoError(t, err) + + err = f.Run(ctx, atesting.State{ + Configure: simpleIsolatedUnitsConfig, + AgentState: atesting.NewClientState(client.Healthy), + Components: map[string]atesting.ComponentState{ + "fake-isolated-units-default-fake-isolated-units": { + State: atesting.NewClientState(client.Healthy), + Units: map[atesting.ComponentUnitKey]atesting.ComponentUnitState{ + atesting.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-isolated-units-default-fake-isolated-units"}: { + State: atesting.NewClientState(client.Healthy), + }, + atesting.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-isolated-units-default-fake-isolated-units-unit"}: { + State: atesting.NewClientState(client.Configuring), + }, + }, + }, + }, + }, atesting.State{ + Configure: complexIsolatedUnitsConfig, + AgentState: atesting.NewClientState(client.Healthy), + Components: map[string]atesting.ComponentState{ + "fake-isolated-units-default-fake-isolated-units": { + State: atesting.NewClientState(client.Healthy), + Units: map[atesting.ComponentUnitKey]atesting.ComponentUnitState{ + atesting.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-isolated-units-default-fake-isolated-units"}: { + State: atesting.NewClientState(client.Healthy), + }, + atesting.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-isolated-units-default-fake-isolated-units-unit"}: { + State: atesting.NewClientState(client.Healthy), + }, + }, + }, + "fake-isolated-units-default-fake-isolated-units-1": { + State: atesting.NewClientState(client.Healthy), + Units: map[atesting.ComponentUnitKey]atesting.ComponentUnitState{ + atesting.ComponentUnitKey{UnitType: client.UnitTypeOutput, UnitID: "fake-isolated-units-default-fake-isolated-units-1"}: { + State: atesting.NewClientState(client.Healthy), + }, + atesting.ComponentUnitKey{UnitType: client.UnitTypeInput, UnitID: "fake-isolated-units-default-fake-isolated-units-1-unit"}: { + State: atesting.NewClientState(client.Healthy), + }, + }, + }, + }, + }) + require.NoError(t, err) +} diff --git a/x-pack/agentbeat/testing/integration/fakes.go b/x-pack/agentbeat/testing/integration/fakes.go new file mode 100644 index 000000000000..8df18085dc3d --- /dev/null +++ b/x-pack/agentbeat/testing/integration/fakes.go @@ -0,0 +1,85 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "path/filepath" + "runtime" + + "github.com/elastic/elastic-agent/pkg/component" + atesting "github.com/elastic/elastic-agent/pkg/testing" +) + +const fakeOutputName = "fake-output" + +var fakeComponentPltfs = []string{ + "container/amd64", + "container/arm64", + "darwin/amd64", + "darwin/arm64", + "linux/amd64", + "linux/arm64", + "windows/amd64", +} + +var fakeComponent = atesting.UsableComponent{ + Name: "fake", + BinaryPath: mustAbs(filepath.Join("..", "..", "pkg", "component", "fake", "component", osExt("component"))), + Spec: &component.Spec{ + Version: 2, + Inputs: []component.InputSpec{ + { + Name: "fake", + Description: "A fake input", + Platforms: fakeComponentPltfs, + Outputs: []string{fakeOutputName}, + Command: &component.CommandSpec{}, + }, + { + Name: "fake-apm", + Description: "Fake component apm traces generator", + Platforms: fakeComponentPltfs, + Outputs: []string{fakeOutputName}, + Command: &component.CommandSpec{ + Env: []component.CommandEnvSpec{ + { + Name: "ELASTIC_APM_LOG_FILE", + Value: "stderr", + }, + { + Name: "ELASTIC_APM_LOG_LEVEL", + Value: "debug", + }, + }, + }, + }, + { + Name: "fake-isolated-units", + Description: "A fake isolated units input", + Platforms: fakeComponentPltfs, + Outputs: []string{fakeOutputName}, + Command: &component.CommandSpec{}, + IsolateUnits: true, + }, + }, + }, +} + +func mustAbs(path string) string { + abs, err := filepath.Abs(path) + if err != nil { + panic(err) + } + return abs +} + +func osExt(name string) string { + if runtime.GOOS == "windows" { + return name + ".exe" + } + return name +} diff --git a/x-pack/agentbeat/testing/integration/fleet-server.json b/x-pack/agentbeat/testing/integration/fleet-server.json new file mode 100644 index 000000000000..1daaa97c8dd1 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/fleet-server.json @@ -0,0 +1,38 @@ +{ + "id": "3434b864-d135-4d03-a944-29ee7ad61ddd", + "version": "WzMwNywxXQ==", + "name": "fleet_server-1", + "namespace": "", + "description": "", + "package": { + "name": "fleet_server", + "title": "Fleet Server", + "version": "1.5.0" + }, + "enabled": true, + "inputs": [ + { + "type": "fleet-server", + "policy_template": "fleet_server", + "enabled": true, + "streams": [], + "vars": { + "max_agents": { + "type": "integer" + }, + "max_connections": { + "type": "integer" + }, + "custom": { + "value": "", + "type": "yaml" + } + } + } + ], + "revision": 1, + "created_at": "2024-05-22T16:13:09.177Z", + "created_by": "system", + "updated_at": "2024-05-22T16:13:09.177Z", + "updated_by": "system" +} diff --git a/x-pack/agentbeat/testing/integration/fleetserver_test.go b/x-pack/agentbeat/testing/integration/fleetserver_test.go new file mode 100644 index 000000000000..51d4561b403b --- /dev/null +++ b/x-pack/agentbeat/testing/integration/fleetserver_test.go @@ -0,0 +1,168 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "crypto/tls" + "net/http" + "net/url" + "os" + "path/filepath" + "runtime" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +func fleetPolicy() kibana.AgentPolicy { + policyUUID := uuid.Must(uuid.NewV4()).String() + + return kibana.AgentPolicy{ + ID: "test-fleet-policy-" + policyUUID, + Name: "test-fleet-policy-" + policyUUID, + Namespace: "default", + Description: "Test fleet policy " + policyUUID, + } +} + +func TestInstallFleetServerBootstrap(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: FleetPrivileged, + Stack: &define.Stack{}, + Sudo: true, + Local: false, + }) + + t.Skip("Skip until the first 8.16.0-SNAPSHOT is available") + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithAdditionalArgs([]string{"-E", "output.elasticsearch.allow_older_versions=true"})) + require.NoError(t, err) + err = fixture.Prepare(ctx) + require.NoError(t, err) + + t.Log("Ensure base path is clean") + var defaultBasePath string + switch runtime.GOOS { + case "darwin": + defaultBasePath = `/Library` + case "linux": + defaultBasePath = `/opt` + case "windows": + defaultBasePath = `C:\Program Files` + } + + topPath := filepath.Join(defaultBasePath, "Elastic", "Agent") + err = os.RemoveAll(topPath) + require.NoError(t, err, "failed to remove %q. The test requires this path not to exist.") + + t.Log("Create fleet-server policy...") + policyResp, err := info.KibanaClient.CreatePolicy(ctx, fleetPolicy()) + require.NoError(t, err, "failed creating policy") + policy := policyResp.AgentPolicy + _, err = tools.InstallPackageFromDefaultFile(ctx, info.KibanaClient, "fleet-server", "1.5.0", "fleet-server.json", uuid.Must(uuid.NewV4()).String(), policy.ID) + require.NoError(t, err, "failed creating fleet-server integration") + + t.Log("Get fleet-server service token...") + serviceToken, err := estools.CreateServiceToken(ctx, info.ESClient, "fleet-server") + require.NoError(t, err, "failed creating service token") + + esHost, ok := os.LookupEnv("ELASTICSEARCH_HOST") + require.True(t, ok, "environment var ELASTICSEARCH_HOST is empty") + u, err := url.Parse(esHost) + require.NoError(t, err, "could not parse %q as a URL", esHost) + if u.Port() == "" { + switch u.Scheme { + case "": + u.Host += ":80" + u.Scheme = "http" + case "http": + u.Host += ":80" + case "https": + u.Host += ":443" + default: + require.Failf(t, "elasticsearch host has unknown scheme: %s", u.Scheme) + } + esHost = u.String() + } + + t.Logf("fleet-server will enroll with es host: %q", esHost) + + // Run `elastic-agent install` with fleet-server bootstrap options. + // We use `--force` to prevent interactive execution. + opts := &atesting.InstallOpts{ + Force: true, + Privileged: true, + FleetBootstrapOpts: atesting.FleetBootstrapOpts{ + ESHost: esHost, + ServiceToken: serviceToken, + Policy: policy.ID, + Port: 8220, + }, + } + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("Install output: %s", out) + require.NoError(t, err, "unable to install elastic-agent with fleet-server bootstrap options") + } + + // checkInstallSuccess(t, fixture, topPath, true) // FIXME fails to build if this is uncommented, but the method is part of install_test.go + t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true)) + + // elastic-agent will self sign a cert to use with fleet-server if one is not passed + // in order to interact with the API we need to ignore the cert. + client := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + } + fleetOK := false + for i := 0; i < 10; i++ { + t.Log("Checking fleet-server status") + resp, err := client.Get("https://localhost:8220/api/status") + if err != nil { + t.Logf("fleet-server status check returned error: %v, retry in 10s...", err) + time.Sleep(10 * time.Second) + continue + } + if resp.StatusCode == http.StatusOK { + fleetOK = true + break + } + t.Logf("fleet-server status check returned incorrect status: %d, retry in 10s", resp.StatusCode) + time.Sleep(10 * time.Second) + continue + } + require.True(t, fleetOK, "expected fleet-server /api/status to return 200") + + // Make sure uninstall from within the topPath fails on Windows + if runtime.GOOS == "windows" { + cwd, err := os.Getwd() + require.NoErrorf(t, err, "GetWd failed: %s", err) + err = os.Chdir(topPath) + require.NoErrorf(t, err, "Chdir to topPath failed: %s", err) + t.Cleanup(func() { + _ = os.Chdir(cwd) + }) + out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true}) + require.Error(t, err, "uninstall should have failed") + require.Containsf(t, string(out), "uninstall must be run from outside the installed path", "expected error string not found in: %s err: %s", out, err) + } +} diff --git a/x-pack/agentbeat/testing/integration/fqdn_test.go b/x-pack/agentbeat/testing/integration/fqdn_test.go new file mode 100644 index 000000000000..d57f11fd53e7 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/fqdn_test.go @@ -0,0 +1,334 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/go-elasticsearch/v8" +) + +func TestFQDN(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: FQDN, + OS: []define.OS{ + {Type: define.Linux}, + }, + Stack: &define.Stack{}, + Local: false, + Sudo: true, + }) + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + externalIP, err := getExternalIP() + require.NoError(t, err) + + // Save original /etc/hosts so we can restore it at the end of each test + origEtcHosts, err := getEtcHosts() + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Save original hostname so we can restore it at the end of each test + origHostname, err := getHostname(ctx) + require.NoError(t, err) + + kibClient := info.KibanaClient + + shortName := strings.ToLower(randStr(6)) + fqdn := shortName + ".baz.io" + t.Logf("Set FQDN on host to %s", fqdn) + err = setHostFQDN(ctx, origEtcHosts, externalIP, fqdn, t.Log) + require.NoError(t, err) + + t.Log("Enroll agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-fqdn-" + strings.ReplaceAll(fqdn, ".", "-"), + Namespace: info.Namespace, + Description: fmt.Sprintf("Test policy for FQDN E2E test (%s)", fqdn), + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "fqdn", + "enabled": false, + }, + }, + } + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + } + policy, err := tools.InstallAgentWithPolicy(ctx, t, installOpts, agentFixture, kibClient, createPolicyReq) + require.NoError(t, err) + + t.Cleanup(func() { + // Use a separate context as the one in the test body will have been cancelled at this point. + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), time.Minute) + defer cleanupCancel() + + t.Log("Un-enrolling Elastic Agent...") + assert.NoError(t, fleettools.UnEnrollAgent(cleanupCtx, info.KibanaClient, policy.ID)) + + t.Log("Restoring hostname...") + err := setHostname(cleanupCtx, origHostname, t.Log) + require.NoError(t, err) + + t.Log("Restoring original /etc/hosts...") + err = setEtcHosts(origEtcHosts) + require.NoError(t, err) + }) + + t.Log("Verify that agent name is short hostname") + agent := verifyAgentName(ctx, t, policy.ID, shortName, info.KibanaClient) + + t.Log("Verify that hostname in `logs-*` and `metrics-*` is short hostname") + verifyHostNameInIndices(t, "logs-*", shortName, info.Namespace, info.ESClient) + verifyHostNameInIndices(t, "metrics-*", shortName, info.Namespace, info.ESClient) + + t.Log("Update Agent policy to enable FQDN") + policy.AgentFeatures = []map[string]interface{}{ + { + "name": "fqdn", + "enabled": true, + }, + } + updatePolicyReq := kibana.AgentPolicyUpdateRequest{ + Name: policy.Name, + Namespace: info.Namespace, + AgentFeatures: policy.AgentFeatures, + } + _, err = kibClient.UpdatePolicy(ctx, policy.ID, updatePolicyReq) + require.NoError(t, err) + + t.Log("Wait until policy has been applied by Agent") + expectedAgentPolicyRevision := agent.PolicyRevision + 1 + require.Eventually( + t, + tools.IsPolicyRevision(ctx, t, kibClient, agent.ID, expectedAgentPolicyRevision), + 2*time.Minute, + 1*time.Second, + ) + + t.Log("Verify that agent name is FQDN") + verifyAgentName(ctx, t, policy.ID, fqdn, info.KibanaClient) + + t.Log("Verify that hostname in `logs-*` and `metrics-*` is FQDN") + verifyHostNameInIndices(t, "logs-*", fqdn, info.Namespace, info.ESClient) + verifyHostNameInIndices(t, "metrics-*", fqdn, info.Namespace, info.ESClient) + + t.Log("Update Agent policy to disable FQDN") + policy.AgentFeatures = []map[string]interface{}{ + { + "name": "fqdn", + "enabled": false, + }, + } + updatePolicyReq = kibana.AgentPolicyUpdateRequest{ + Name: policy.Name, + Namespace: info.Namespace, + AgentFeatures: policy.AgentFeatures, + } + _, err = kibClient.UpdatePolicy(ctx, policy.ID, updatePolicyReq) + require.NoError(t, err) + + t.Log("Wait until policy has been applied by Agent") + expectedAgentPolicyRevision++ + require.Eventually( + t, + tools.IsPolicyRevision(ctx, t, kibClient, agent.ID, expectedAgentPolicyRevision), + 2*time.Minute, + 1*time.Second, + ) + + t.Log("Verify that agent name is short hostname again") + verifyAgentName(ctx, t, policy.ID, shortName, info.KibanaClient) + + // TODO: Re-enable assertion once https://github.com/elastic/elastic-agent/issues/3078 is + // investigated for root cause and resolved. + // t.Log("Verify that hostname in `logs-*` and `metrics-*` is short hostname again") + // verifyHostNameInIndices(t, "logs-*", shortName, info.ESClient) + // verifyHostNameInIndices(t, "metrics-*", shortName, info.ESClient) +} + +func verifyAgentName(ctx context.Context, t *testing.T, policyID, hostname string, kibClient *kibana.Client) *kibana.AgentExisting { + t.Helper() + + var agent *kibana.AgentExisting + var err error + + require.Eventually( + t, + func() bool { + agent, err = fleettools.GetAgentByPolicyIDAndHostnameFromList(ctx, kibClient, policyID, hostname) + return err == nil && agent != nil + }, + 5*time.Minute, + 5*time.Second, + ) + + return agent +} + +func verifyHostNameInIndices(t *testing.T, indices, hostname, namespace string, esClient *elasticsearch.Client) { + queryRaw := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "must": []map[string]interface{}{ + { + "term": map[string]interface{}{ + "host.name": map[string]interface{}{ + "value": hostname, + }, + }, + }, + { + "term": map[string]interface{}{ + "data_stream.namespace": map[string]interface{}{ + "value": namespace, + }, + }, + }, + }, + }, + }, + } + + var buf bytes.Buffer + err := json.NewEncoder(&buf).Encode(queryRaw) + require.NoError(t, err) + + search := esClient.Search + + require.Eventually( + t, + func() bool { + resp, err := search( + search.WithIndex(indices), + search.WithSort("@timestamp:desc"), + search.WithFilterPath("hits.hits"), + search.WithSize(1), + search.WithBody(&buf), + ) + require.NoError(t, err) + require.False(t, resp.IsError()) + defer resp.Body.Close() + + var body struct { + Hits struct { + Hits []struct { + Source struct { + Host struct { + Name string `json:"name"` + } `json:"host"` + } `json:"_source"` + } `json:"hits"` + } `json:"hits"` + } + decoder := json.NewDecoder(resp.Body) + err = decoder.Decode(&body) + require.NoError(t, err) + + return len(body.Hits.Hits) == 1 + }, + 2*time.Minute, + 5*time.Second, + ) +} + +func getHostname(ctx context.Context) (string, error) { + cmd := exec.CommandContext(ctx, "hostname") + out, err := cmd.Output() + if err != nil { + return "", err + } + + return strings.TrimSpace(string(out)), nil +} + +func getEtcHosts() ([]byte, error) { + filename := string(filepath.Separator) + filepath.Join("etc", "hosts") + return os.ReadFile(filename) +} + +func setHostFQDN(ctx context.Context, etcHosts []byte, externalIP, fqdn string, log func(args ...any)) error { + filename := string(filepath.Separator) + filepath.Join("etc", "hosts") + + // Add entry for FQDN in /etc/hosts + parts := strings.Split(fqdn, ".") + shortName := parts[0] + line := fmt.Sprintf("%s\t%s %s\n", externalIP, fqdn, shortName) + + etcHosts = append(etcHosts, []byte(line)...) + err := os.WriteFile(filename, etcHosts, 0o644) + if err != nil { + return err + } + + // Set hostname to FQDN + cmd := exec.CommandContext(ctx, "hostname", shortName) + output, err := cmd.Output() + if err != nil { + log(string(output)) + } + + return err +} + +func setEtcHosts(data []byte) error { + filename := string(filepath.Separator) + filepath.Join("etc", "hosts") + return os.WriteFile(filename, data, 0o644) +} + +func setHostname(ctx context.Context, hostname string, log func(args ...any)) error { + cmd := exec.CommandContext(ctx, "hostname", hostname) + output, err := cmd.Output() + if err != nil { + log(string(output)) + } + return err +} + +func getExternalIP() (string, error) { + resp, err := http.Get("https://api.ipify.org") + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(body)), nil +} diff --git a/x-pack/agentbeat/testing/integration/groups_test.go b/x-pack/agentbeat/testing/integration/groups_test.go new file mode 100644 index 000000000000..0440f49f0ffb --- /dev/null +++ b/x-pack/agentbeat/testing/integration/groups_test.go @@ -0,0 +1,43 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import "github.com/elastic/elastic-agent/pkg/testing/define" + +const ( + // Default group. + Default = define.Default + + // Fleet group of tests. Used for testing Elastic Agent with Fleet. + Fleet = "fleet" + + // FleetPrivileged group of tests. Used for testing Elastic Agent with Fleet installed privileged. + FleetPrivileged = "fleet-privileged" + + // FleetAirgapped group of tests. Used for testing Elastic Agent with Fleet and airgapped. + FleetAirgapped = "fleet-airgapped" + + // FleetAirgappedPrivileged group of tests. Used for testing Elastic Agent with Fleet installed + // privileged and airgapped. + FleetAirgappedPrivileged = "fleet-airgapped-privileged" + + // FleetUpgradeToPRBuild group of tests. Used for testing Elastic Agent + // upgrading to a build built from the PR being tested. + FleetUpgradeToPRBuild = "fleet-upgrade-to-pr-build" + + // FQDN group of tests. Used for testing Elastic Agent with FQDN enabled. + FQDN = "fqdn" + + // Upgrade group of tests. Used for testing upgrades. + Upgrade = "upgrade" + + // Deb group of tests. Used for testing .deb packages install & upgrades + Deb = "deb" + + // RPM group of tests. Used for testing .rpm packages install & upgrades + RPM = "rpm" +) diff --git a/x-pack/agentbeat/testing/integration/inspect_test.go b/x-pack/agentbeat/testing/integration/inspect_test.go new file mode 100644 index 000000000000..74866ff8460b --- /dev/null +++ b/x-pack/agentbeat/testing/integration/inspect_test.go @@ -0,0 +1,91 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" + + integrationtest "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/check" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/testing/fleetservertest" +) + +func TestInspect(t *testing.T) { + _ = define.Require(t, define.Requirements{ + Group: Fleet, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithTimeout(t, context.Background(), time.Minute*10) + defer cancel() + + apiKey, policy := createBasicFleetPolicyData(t, "http://fleet-server:8220") + checkinWithAcker := fleetservertest.NewCheckinActionsWithAcker() + fleet := fleetservertest.NewServerWithHandlers( + apiKey, + "enrollmentToken", + policy.AgentID, + policy.PolicyID, + checkinWithAcker.ActionsGenerator(), + checkinWithAcker.Acker(), + fleetservertest.WithRequestLog(t.Logf), + ) + defer fleet.Close() + policyChangeAction, err := fleetservertest.NewActionPolicyChangeWithFakeComponent("test-policy-change", fleetservertest.TmplPolicy{ + AgentID: policy.AgentID, + PolicyID: policy.PolicyID, + FleetHosts: []string{fleet.LocalhostURL}, + }) + require.NoError(t, err) + checkinWithAcker.AddCheckin("token", 0, policyChangeAction) + + fixture, err := define.NewFixtureFromLocalBuild(t, + define.Version(), + integrationtest.WithAllowErrors(), + integrationtest.WithLogOutput()) + require.NoError(t, err, "SetupTest: NewFixtureFromLocalBuild failed") + err = fixture.EnsurePrepared(ctx) + require.NoError(t, err, "SetupTest: fixture.Prepare failed") + + out, err := fixture.Install( + ctx, + &integrationtest.InstallOpts{ + Force: true, + NonInteractive: true, + Insecure: true, + Privileged: false, + EnrollOpts: integrationtest.EnrollOpts{ + URL: fleet.LocalhostURL, + EnrollmentToken: "anythingWillDO", + }}) + require.NoErrorf(t, err, "Error when installing agent, output: %s", out) + check.ConnectedToFleet(ctx, t, fixture, 5*time.Minute) + + p, err := fixture.Exec(ctx, []string{"inspect"}) + require.NoErrorf(t, err, "Error when running inspect, output: %s", p) + // Unmarshal into minimal object just to check if a secret has been redacted. + var yObj struct { + SecretPaths []string `yaml:"secret_paths"` + Inputs []struct { + SecretKey string `yaml:"secret_key"` + } `yaml:"inputs"` + } + err = yaml.Unmarshal(p, &yObj) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"inputs.0.secret_key"}, yObj.SecretPaths) + require.Len(t, yObj.Inputs, 1) + assert.Equalf(t, "", yObj.Inputs[0].SecretKey, "inspect output: %s", p) +} diff --git a/x-pack/agentbeat/testing/integration/install_test.go b/x-pack/agentbeat/testing/integration/install_test.go new file mode 100644 index 000000000000..50bf0363b5b1 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/install_test.go @@ -0,0 +1,420 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "encoding/json" + "fmt" + "io" + "math/rand/v2" + "net/http" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/testing/installtest" +) + +func TestInstallWithoutBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install` (even though it will + // be installed as non-root). + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := atesting.InstallOpts{Force: true, Privileged: false} + out, err := fixture.Install(ctx, &opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in default base path + topPath := installtest.DefaultTopPath() + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, &installtest.CheckOpts{Privileged: opts.Privileged})) + + t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true)) + t.Run("check second agent installs with --develop", testSecondAgentCanInstall(ctx, fixture, "", true, opts)) + + // Make sure uninstall from within the topPath fails on Windows + if runtime.GOOS == "windows" { + cwd, err := os.Getwd() + require.NoErrorf(t, err, "GetWd failed: %s", err) + err = os.Chdir(topPath) + require.NoErrorf(t, err, "Chdir to topPath failed: %s", err) + t.Cleanup(func() { + _ = os.Chdir(cwd) + }) + out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true}) + require.Error(t, err, "uninstall should have failed") + require.Containsf(t, string(out), "uninstall must be run from outside the installed path", "expected error string not found in: %s err: %s", out, err) + } +} + +func TestInstallWithBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install` (even though it will + // be installed as non-root). + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // When installing with unprivileged using a base path the + // base needs to be accessible by the `elastic-agent-user` user that will be + // executing the process, but is not created yet. Using a base that exists + // and is known to be accessible by standard users, ensures this tests + // works correctly and will not hit a permission issue when spawning the + // elastic-agent service. + var basePath string + switch runtime.GOOS { + case define.Linux: + basePath = `/usr` + case define.Windows: + basePath = `C:\` + default: + // Set up random temporary directory to serve as base path for Elastic Agent + // installation. + tmpDir := t.TempDir() + basePath = filepath.Join(tmpDir, strings.ToLower(randStr(8))) + } + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := atesting.InstallOpts{ + BasePath: basePath, + Force: true, + Privileged: false, + } + out, err := fixture.Install(ctx, &opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in the custom base path + topPath := filepath.Join(basePath, "Elastic", "Agent") + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, &installtest.CheckOpts{Privileged: opts.Privileged})) + + t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true)) + t.Run("check second agent installs with --namespace", testSecondAgentCanInstall(ctx, fixture, basePath, false, opts)) + + // Make sure uninstall from within the topPath fails on Windows + if runtime.GOOS == "windows" { + cwd, err := os.Getwd() + require.NoErrorf(t, err, "GetWd failed: %s", err) + err = os.Chdir(topPath) + require.NoErrorf(t, err, "Chdir to topPath failed: %s", err) + t.Cleanup(func() { + _ = os.Chdir(cwd) + }) + out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true}) + require.Error(t, err, "uninstall should have failed") + require.Containsf(t, string(out), "uninstall must be run from outside the installed path", "expected error string not found in: %s err: %s", out, err) + } +} + +func TestInstallPrivilegedWithoutBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install`. + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := atesting.InstallOpts{Force: true, Privileged: true} + out, err := fixture.Install(ctx, &opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in default base path + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, &installtest.CheckOpts{Privileged: opts.Privileged})) + + t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true)) + t.Run("check second agent installs with --namespace", testSecondAgentCanInstall(ctx, fixture, "", false, opts)) +} + +func TestInstallPrivilegedWithBasePath(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install`. + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // Set up random temporary directory to serve as base path for Elastic Agent + // installation. + tmpDir := t.TempDir() + randomBasePath := filepath.Join(tmpDir, strings.ToLower(randStr(8))) + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := atesting.InstallOpts{ + BasePath: randomBasePath, + Force: true, + Privileged: true, + } + out, err := fixture.Install(ctx, &opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in the custom base path + topPath := filepath.Join(randomBasePath, "Elastic", "Agent") + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, &installtest.CheckOpts{Privileged: opts.Privileged})) + t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true)) + t.Run("check second agent installs with --develop", testSecondAgentCanInstall(ctx, fixture, randomBasePath, true, opts)) +} + +// Tests that a second agent can be installed in an isolated namespace, using either --develop or --namespace. +func testSecondAgentCanInstall(ctx context.Context, fixture *atesting.Fixture, basePath string, develop bool, installOpts atesting.InstallOpts) func(*testing.T) { + return func(t *testing.T) { + // Get path to Elastic Agent executable + devFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = devFixture.Prepare(ctx) + require.NoError(t, err) + + // If development mode was requested, the namespace will be automatically set to Development after Install(). + // Otherwise, install into a test namespace. + installOpts.Develop = develop + if !installOpts.Develop { + installOpts.Namespace = "Testing" + } + + devOut, err := devFixture.Install(ctx, &installOpts) + if err != nil { + t.Logf("install output: %s", devOut) + require.NoError(t, err) + } + + topPath := installtest.NamespaceTopPath(installOpts.Namespace) + if basePath != "" { + topPath = filepath.Join(basePath, "Elastic", paths.InstallDirNameForNamespace(installOpts.Namespace)) + } + + require.NoError(t, installtest.CheckSuccess(ctx, fixture, topPath, &installtest.CheckOpts{ + Privileged: installOpts.Privileged, + Namespace: installOpts.Namespace, + })) + } +} + +// TestInstallUninstallAudit will test to make sure that a fleet-managed agent can use the audit/unenroll endpoint when uninstalling itself. +func TestInstallUninstallAudit(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Default, + Stack: &define.Stack{}, // needs a fleet-server. + Sudo: true, + Local: false, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + policyResp, enrollmentTokenResp := createPolicyAndEnrollmentToken(ctx, t, info.KibanaClient, createBasicPolicy()) + t.Logf("Created policy %+v", policyResp.AgentPolicy) + + t.Log("Getting default Fleet Server URL...") + fleetServerURL, err := fleettools.DefaultURL(ctx, info.KibanaClient) + require.NoError(t, err, "failed getting Fleet Server URL") + + err = fixture.Prepare(ctx) + require.NoError(t, err) + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{ + Force: true, + EnrollOpts: atesting.EnrollOpts{ + URL: fleetServerURL, + EnrollmentToken: enrollmentTokenResp.APIKey, + }, + } + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + require.Eventuallyf(t, func() bool { + return waitForAgentAndFleetHealthy(ctx, t, fixture) + }, time.Minute, time.Second, "agent never became healthy or connected to Fleet") + + agentID, err := getAgentID(ctx, fixture) + require.NoError(t, err, "error getting the agent inspect output") + require.NotEmpty(t, agentID, "agent ID empty") + + out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true}) + if err != nil { + t.Logf("uninstall output: %s", out) + require.NoError(t, err) + } + + // TODO: replace direct query to ES index with API call to Fleet + // Blocked on https://github.com/elastic/kibana/issues/194884 + response, err := info.ESClient.Get(".fleet-agents", agentID, info.ESClient.Get.WithContext(ctx)) + require.NoError(t, err) + defer response.Body.Close() + p, err := io.ReadAll(response.Body) + require.NoError(t, err) + require.Equalf(t, http.StatusOK, response.StatusCode, "ES status code expected 200, body: %s", p) + var res struct { + Source struct { + AuditUnenrolledReason string `json:"audit_unenrolled_reason"` + } `json:"_source"` + } + err = json.Unmarshal(p, &res) + require.NoError(t, err) + require.Equal(t, "uninstall", res.Source.AuditUnenrolledReason) +} + +// TestRepeatedInstallUninstall will install then uninstall the agent +// repeatedly. This test exists because of a number of race +// conditions that have occurred in the uninstall process. Current +// testing shows each iteration takes around 16 seconds. +func TestRepeatedInstallUninstall(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Default, + // We require sudo for this test to run + // `elastic-agent install` (even though it will + // be installed as non-root). + Sudo: true, + + // It's not safe to run this test locally as it + // installs Elastic Agent. + Local: false, + }) + + maxRunTime := 2 * time.Minute + iterations := 100 + for i := 0; i < iterations; i++ { + t.Run(fmt.Sprintf("%s-%d", t.Name(), i), func(t *testing.T) { + + // Get path to Elastic Agent executable + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(maxRunTime)) + defer cancel() + + // Prepare the Elastic Agent so the binary is extracted and ready to use. + err = fixture.Prepare(ctx) + require.NoError(t, err) + + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{Force: true} + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in default base path + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, &installtest.CheckOpts{Privileged: opts.Privileged})) + t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true)) + out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true}) + require.NoErrorf(t, err, "uninstall failed: %s", err) + }) + } +} + +func randStr(length int) string { + var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + + runes := make([]rune, length) + for i := range runes { + runes[i] = letters[rand.IntN(len(letters))] + } + + return string(runes) +} diff --git a/x-pack/agentbeat/testing/integration/kubernetes_agent_service_test.go b/x-pack/agentbeat/testing/integration/kubernetes_agent_service_test.go new file mode 100644 index 000000000000..4a5ebdda2ad6 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/kubernetes_agent_service_test.go @@ -0,0 +1,129 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bufio" + "bytes" + "context" + "crypto/sha256" + "encoding/base64" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + + "github.com/elastic/elastic-agent/pkg/testing/define" +) + +func TestKubernetesAgentService(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: false, + OS: []define.OS{ + // only test the service container + {Type: define.Kubernetes, DockerVariant: "service"}, + }, + Group: define.Kubernetes, + }) + + agentImage := os.Getenv("AGENT_IMAGE") + require.NotEmpty(t, agentImage, "AGENT_IMAGE must be set") + + client, err := info.KubeClient() + require.NoError(t, err) + require.NotNil(t, client) + + testLogsBasePath := os.Getenv("K8S_TESTS_POD_LOGS_BASE") + require.NotEmpty(t, testLogsBasePath, "K8S_TESTS_POD_LOGS_BASE must be set") + + err = os.MkdirAll(filepath.Join(testLogsBasePath, t.Name()), 0755) + require.NoError(t, err, "failed to create test logs directory") + + namespace := info.Namespace + + esHost := os.Getenv("ELASTICSEARCH_HOST") + require.NotEmpty(t, esHost, "ELASTICSEARCH_HOST must be set") + + esAPIKey, err := generateESAPIKey(info.ESClient, namespace) + require.NoError(t, err, "failed to generate ES API key") + require.NotEmpty(t, esAPIKey, "failed to generate ES API key") + + renderedManifest, err := renderKustomize(agentK8SKustomize) + require.NoError(t, err, "failed to render kustomize") + + hasher := sha256.New() + hasher.Write([]byte(t.Name())) + testNamespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil))) + testNamespace = noSpecialCharsRegexp.ReplaceAllString(testNamespace, "") + + k8sObjects, err := yamlToK8SObjects(bufio.NewReader(bytes.NewReader(renderedManifest))) + require.NoError(t, err, "failed to convert yaml to k8s objects") + + adjustK8SAgentManifests(k8sObjects, testNamespace, "elastic-agent-standalone", + func(container *corev1.Container) { + // set agent image + container.Image = agentImage + // set ImagePullPolicy to "Never" to avoid pulling the image + // as the image is already loaded by the kubernetes provisioner + container.ImagePullPolicy = "Never" + + // set Elasticsearch host and API key + for idx, env := range container.Env { + if env.Name == "ES_HOST" { + container.Env[idx].Value = esHost + container.Env[idx].ValueFrom = nil + } + if env.Name == "API_KEY" { + container.Env[idx].Value = esAPIKey + container.Env[idx].ValueFrom = nil + } + } + + // has a unique entrypoint and command because its ran in the cloud + // adjust the spec to run it correctly + container.Command = []string{"elastic-agent"} + container.Args = []string{"-c", "/etc/elastic-agent/agent.yml", "-e"} + }, + func(pod *corev1.PodSpec) { + for volumeIdx, volume := range pod.Volumes { + // need to update the volume path of the state directory + // to match the test namespace + if volume.Name == "elastic-agent-state" { + hostPathType := corev1.HostPathDirectoryOrCreate + pod.Volumes[volumeIdx].VolumeSource.HostPath = &corev1.HostPathVolumeSource{ + Type: &hostPathType, + Path: fmt.Sprintf("/var/lib/elastic-agent-standalone/%s/state", testNamespace), + } + } + } + }) + + // update the configmap to only run the connectors input + serviceAgentYAML, err := os.ReadFile(filepath.Join("testdata", "connectors.agent.yml")) + require.NoError(t, err) + for _, obj := range k8sObjects { + switch objWithType := obj.(type) { + case *corev1.ConfigMap: + _, ok := objWithType.Data["agent.yml"] + if ok { + objWithType.Data["agent.yml"] = string(serviceAgentYAML) + } + } + } + + ctx := context.Background() + + deployK8SAgent(t, ctx, client, k8sObjects, testNamespace, false, testLogsBasePath, map[string]bool{ + "connectors-py": true, + }) +} diff --git a/x-pack/agentbeat/testing/integration/kubernetes_agent_standalone_test.go b/x-pack/agentbeat/testing/integration/kubernetes_agent_standalone_test.go new file mode 100644 index 000000000000..ddcbb559cca1 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/kubernetes_agent_standalone_test.go @@ -0,0 +1,977 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bufio" + "bytes" + "context" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + + "io" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/elastic/go-elasticsearch/v8" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/apimachinery/pkg/util/yaml" + "k8s.io/client-go/kubernetes" + "sigs.k8s.io/e2e-framework/klient" + "sigs.k8s.io/e2e-framework/klient/k8s" + "sigs.k8s.io/kustomize/api/krusty" + "sigs.k8s.io/kustomize/kyaml/filesys" + + "helm.sh/helm/v3/pkg/action" + "helm.sh/helm/v3/pkg/chart/loader" + "helm.sh/helm/v3/pkg/cli" + + aclient "github.com/elastic/elastic-agent/pkg/control/v2/client" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" +) + +const ( + agentK8SKustomize = "../../deploy/kubernetes/elastic-agent-kustomize/default/elastic-agent-standalone" + agentK8SHelm = "../../deploy/helm/elastic-agent" +) + +var noSpecialCharsRegexp = regexp.MustCompile("[^a-zA-Z0-9]+") + +func TestKubernetesAgentStandaloneKustomize(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: false, + OS: []define.OS{ + // only test the basic and the wolfi container with otel + {Type: define.Kubernetes, DockerVariant: "basic"}, + {Type: define.Kubernetes, DockerVariant: "wolfi"}, + }, + Group: define.Kubernetes, + }) + + agentImage := os.Getenv("AGENT_IMAGE") + require.NotEmpty(t, agentImage, "AGENT_IMAGE must be set") + + client, err := info.KubeClient() + require.NoError(t, err) + require.NotNil(t, client) + + testLogsBasePath := os.Getenv("K8S_TESTS_POD_LOGS_BASE") + require.NotEmpty(t, testLogsBasePath, "K8S_TESTS_POD_LOGS_BASE must be set") + + err = os.MkdirAll(filepath.Join(testLogsBasePath, t.Name()), 0755) + require.NoError(t, err, "failed to create test logs directory") + + namespace := info.Namespace + + esHost := os.Getenv("ELASTICSEARCH_HOST") + require.NotEmpty(t, esHost, "ELASTICSEARCH_HOST must be set") + + esAPIKey, err := generateESAPIKey(info.ESClient, namespace) + require.NoError(t, err, "failed to generate ES API key") + require.NotEmpty(t, esAPIKey, "failed to generate ES API key") + + renderedManifest, err := renderKustomize(agentK8SKustomize) + require.NoError(t, err, "failed to render kustomize") + + testCases := []struct { + name string + runUser *int64 + runGroup *int64 + capabilitiesDrop []corev1.Capability + capabilitiesAdd []corev1.Capability + runK8SInnerTests bool + skipReason string + }{ + { + "default deployment - rootful agent", + nil, + nil, + nil, + nil, + false, + "", + }, + { + "drop ALL capabilities - rootful agent", + int64Ptr(0), + nil, + []corev1.Capability{"ALL"}, + []corev1.Capability{}, + false, + "", + }, + { + "drop ALL add CHOWN, SETPCAP capabilities - rootful agent", + int64Ptr(0), + nil, + []corev1.Capability{"ALL"}, + []corev1.Capability{"CHOWN", "SETPCAP"}, + true, + "", + }, + { + "drop ALL add CHOWN, SETPCAP capabilities - rootless agent", + int64Ptr(1000), // elastic-agent uid + nil, + []corev1.Capability{"ALL"}, + []corev1.Capability{"CHOWN", "SETPCAP", "DAC_READ_SEARCH", "SYS_PTRACE"}, + true, + "", + }, + { + "drop ALL add CHOWN, SETPCAP capabilities - rootless agent random uid:gid", + int64Ptr(500), + int64Ptr(500), + []corev1.Capability{"ALL"}, + []corev1.Capability{"CHOWN", "SETPCAP", "DAC_READ_SEARCH", "SYS_PTRACE"}, + true, + "", + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + if tc.skipReason != "" { + t.Skip(tc.skipReason) + } + + hasher := sha256.New() + hasher.Write([]byte(tc.name)) + testNamespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil))) + testNamespace = noSpecialCharsRegexp.ReplaceAllString(testNamespace, "") + + k8sObjects, err := yamlToK8SObjects(bufio.NewReader(bytes.NewReader(renderedManifest))) + require.NoError(t, err, "failed to convert yaml to k8s objects") + + adjustK8SAgentManifests(k8sObjects, testNamespace, "elastic-agent-standalone", + func(container *corev1.Container) { + // set agent image + container.Image = agentImage + // set ImagePullPolicy to "Never" to avoid pulling the image + // as the image is already loaded by the kubernetes provisioner + container.ImagePullPolicy = "Never" + + container.Resources.Limits = corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("800Mi"), + } + + if tc.capabilitiesDrop != nil || tc.capabilitiesAdd != nil || tc.runUser != nil || tc.runGroup != nil { + // set security context + container.SecurityContext = &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Drop: tc.capabilitiesDrop, + Add: tc.capabilitiesAdd, + }, + RunAsUser: tc.runUser, + RunAsGroup: tc.runGroup, + } + + } + // set Elasticsearch host and API key + for idx, env := range container.Env { + if env.Name == "ES_HOST" { + container.Env[idx].Value = esHost + container.Env[idx].ValueFrom = nil + } + if env.Name == "API_KEY" { + container.Env[idx].Value = esAPIKey + container.Env[idx].ValueFrom = nil + } + } + }, + func(pod *corev1.PodSpec) { + for volumeIdx, volume := range pod.Volumes { + // need to update the volume path of the state directory + // to match the test namespace + if volume.Name == "elastic-agent-state" { + hostPathType := corev1.HostPathDirectoryOrCreate + pod.Volumes[volumeIdx].VolumeSource.HostPath = &corev1.HostPathVolumeSource{ + Type: &hostPathType, + Path: fmt.Sprintf("/var/lib/elastic-agent-standalone/%s/state", testNamespace), + } + } + } + }) + + ctx := context.Background() + + deployK8SAgent(t, ctx, client, k8sObjects, testNamespace, tc.runK8SInnerTests, testLogsBasePath, nil) + }) + } + +} + +func TestKubernetesAgentOtel(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: false, + OS: []define.OS{ + // only test the basic and the wolfi container with otel + {Type: define.Kubernetes, DockerVariant: "basic"}, + {Type: define.Kubernetes, DockerVariant: "wolfi"}, + }, + Group: define.Kubernetes, + }) + + agentImage := os.Getenv("AGENT_IMAGE") + require.NotEmpty(t, agentImage, "AGENT_IMAGE must be set") + + client, err := info.KubeClient() + require.NoError(t, err) + require.NotNil(t, client) + + testLogsBasePath := os.Getenv("K8S_TESTS_POD_LOGS_BASE") + require.NotEmpty(t, testLogsBasePath, "K8S_TESTS_POD_LOGS_BASE must be set") + + err = os.MkdirAll(filepath.Join(testLogsBasePath, t.Name()), 0755) + require.NoError(t, err, "failed to create test logs directory") + + namespace := info.Namespace + + esHost := os.Getenv("ELASTICSEARCH_HOST") + require.NotEmpty(t, esHost, "ELASTICSEARCH_HOST must be set") + + esAPIKey, err := generateESAPIKey(info.ESClient, namespace) + require.NoError(t, err, "failed to generate ES API key") + require.NotEmpty(t, esAPIKey, "failed to generate ES API key") + + renderedManifest, err := renderKustomize(agentK8SKustomize) + require.NoError(t, err, "failed to render kustomize") + + testCases := []struct { + name string + envAdd []corev1.EnvVar + runK8SInnerTests bool + componentPresence map[string]bool + }{ + + { + "run agent in otel mode", + []corev1.EnvVar{ + {Name: "ELASTIC_AGENT_OTEL", Value: "true"}, + }, + false, + map[string]bool{ + "beat/metrics-monitoring": false, + "filestream-monitoring": false, + "system/metrics-default": false, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + hasher := sha256.New() + hasher.Write([]byte(tc.name)) + testNamespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil))) + testNamespace = noSpecialCharsRegexp.ReplaceAllString(testNamespace, "") + + k8sObjects, err := yamlToK8SObjects(bufio.NewReader(bytes.NewReader(renderedManifest))) + require.NoError(t, err, "failed to convert yaml to k8s objects") + + adjustK8SAgentManifests(k8sObjects, testNamespace, "elastic-agent-standalone", + func(container *corev1.Container) { + // set agent image + container.Image = agentImage + // set ImagePullPolicy to "Never" to avoid pulling the image + // as the image is already loaded by the kubernetes provisioner + container.ImagePullPolicy = "Never" + + // set Elasticsearch host and API key + for idx, env := range container.Env { + if env.Name == "ES_HOST" { + container.Env[idx].Value = esHost + container.Env[idx].ValueFrom = nil + } + if env.Name == "API_KEY" { + container.Env[idx].Value = esAPIKey + container.Env[idx].ValueFrom = nil + } + } + + if len(tc.envAdd) > 0 { + container.Env = append(container.Env, tc.envAdd...) + } + + // drop arguments overriding default config + container.Args = []string{} + }, + func(pod *corev1.PodSpec) { + for volumeIdx, volume := range pod.Volumes { + // need to update the volume path of the state directory + // to match the test namespace + if volume.Name == "elastic-agent-state" { + hostPathType := corev1.HostPathDirectoryOrCreate + pod.Volumes[volumeIdx].VolumeSource.HostPath = &corev1.HostPathVolumeSource{ + Type: &hostPathType, + Path: fmt.Sprintf("/var/lib/elastic-agent-standalone/%s/state", testNamespace), + } + } + } + }) + + ctx := context.Background() + + deployK8SAgent(t, ctx, client, k8sObjects, testNamespace, tc.runK8SInnerTests, testLogsBasePath, tc.componentPresence) + }) + } +} + +func TestKubernetesAgentHelm(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: false, + OS: []define.OS{ + // only test the basic and the wolfi container with otel + {Type: define.Kubernetes, DockerVariant: "basic"}, + {Type: define.Kubernetes, DockerVariant: "wolfi"}, + }, + Group: define.Kubernetes, + }) + + agentImage := os.Getenv("AGENT_IMAGE") + require.NotEmpty(t, agentImage, "AGENT_IMAGE must be set") + + agentImageParts := strings.SplitN(agentImage, ":", 2) + require.Len(t, agentImageParts, 2, "AGENT_IMAGE must be in the form ':'") + agentImageRepo := agentImageParts[0] + agentImageTag := agentImageParts[1] + + client, err := info.KubeClient() + require.NoError(t, err) + require.NotNil(t, client) + + testLogsBasePath := os.Getenv("K8S_TESTS_POD_LOGS_BASE") + require.NotEmpty(t, testLogsBasePath, "K8S_TESTS_POD_LOGS_BASE must be set") + + err = os.MkdirAll(filepath.Join(testLogsBasePath, t.Name()), 0755) + require.NoError(t, err, "failed to create test logs directory") + + namespace := info.Namespace + + esHost := os.Getenv("ELASTICSEARCH_HOST") + require.NotEmpty(t, esHost, "ELASTICSEARCH_HOST must be set") + + esAPIKey, err := generateESAPIKey(info.ESClient, namespace) + require.NoError(t, err, "failed to generate ES API key") + require.NotEmpty(t, esAPIKey, "failed to generate ES API key") + + enrollParams, err := fleettools.NewEnrollParams(context.Background(), info.KibanaClient) + require.NoError(t, err, "failed to create fleet enroll params") + + testCases := []struct { + name string + values map[string]any + atLeastValidatedPodsNumber int + runK8SInnerTests bool + }{ + { + name: "helm standalone agent default kubernetes privileged", + values: map[string]any{ + "kubernetes": map[string]any{ + "enabled": true, + }, + "agent": map[string]any{ + "unprivileged": false, + "image": map[string]any{ + "repository": agentImageRepo, + "tag": agentImageTag, + "pullPolicy": "Never", + }, + }, + "outputs": map[string]any{ + "default": map[string]any{ + "type": "ESPlainAuthAPI", + "url": esHost, + "api_key": esAPIKey, + }, + }, + }, + runK8SInnerTests: true, + // - perNode Daemonset (at least 1 agent pod) + // - clusterWide Deployment (1 agent pod) + // - ksmSharded Statefulset (1 agent pod) + atLeastValidatedPodsNumber: 3, + }, + { + name: "helm standalone agent default kubernetes unprivileged", + values: map[string]any{ + "kubernetes": map[string]any{ + "enabled": true, + }, + "agent": map[string]any{ + "unprivileged": true, + "image": map[string]any{ + "repository": agentImageRepo, + "tag": agentImageTag, + "pullPolicy": "Never", + }, + }, + "outputs": map[string]any{ + "default": map[string]any{ + "type": "ESPlainAuthAPI", + "url": esHost, + "api_key": esAPIKey, + }, + }, + }, + runK8SInnerTests: true, + // - perNode Daemonset (at least 1 agent pod) + // - clusterWide Deployment (1 agent pod) + // - ksmSharded Statefulset (1 agent pod) + atLeastValidatedPodsNumber: 3, + }, + { + name: "helm managed agent default kubernetes privileged", + values: map[string]any{ + "agent": map[string]any{ + "unprivileged": false, + "image": map[string]any{ + "repository": agentImageRepo, + "tag": agentImageTag, + "pullPolicy": "Never", + }, + "fleet": map[string]any{ + "enabled": true, + "url": enrollParams.FleetURL, + "token": enrollParams.EnrollmentToken, + "preset": "perNode", + }, + }, + }, + runK8SInnerTests: true, + // - perNode Daemonset (at least 1 agent pod) + atLeastValidatedPodsNumber: 1, + }, + { + name: "helm managed agent default kubernetes unprivileged", + values: map[string]any{ + "agent": map[string]any{ + "unprivileged": true, + "image": map[string]any{ + "repository": agentImageRepo, + "tag": agentImageTag, + "pullPolicy": "Never", + }, + "fleet": map[string]any{ + "enabled": true, + "url": enrollParams.FleetURL, + "token": enrollParams.EnrollmentToken, + "preset": "perNode", + }, + }, + }, + runK8SInnerTests: true, + // - perNode Daemonset (at least 1 agent pod) + atLeastValidatedPodsNumber: 1, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + hasher := sha256.New() + hasher.Write([]byte(tc.name)) + testNamespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil))) + testNamespace = noSpecialCharsRegexp.ReplaceAllString(testNamespace, "") + + settings := cli.New() + settings.SetNamespace(testNamespace) + actionConfig := &action.Configuration{} + + helmChart, err := loader.Load(agentK8SHelm) + require.NoError(t, err, "failed to load helm chart") + + err = actionConfig.Init(settings.RESTClientGetter(), settings.Namespace(), "", + func(format string, v ...interface{}) {}) + require.NoError(t, err, "failed to init helm action config") + + helmValues := tc.values + + t.Cleanup(func() { + if t.Failed() { + dumpLogs(t, ctx, client, testNamespace, testLogsBasePath) + } + + uninstallAction := action.NewUninstall(actionConfig) + uninstallAction.Wait = true + + _, err = uninstallAction.Run("helm-agent") + if err != nil { + require.NoError(t, err, "failed to uninstall helm chart") + } + }) + + installAction := action.NewInstall(actionConfig) + installAction.Namespace = testNamespace + installAction.CreateNamespace = true + installAction.UseReleaseName = true + installAction.ReleaseName = "helm-agent" + installAction.Timeout = 2 * time.Minute + installAction.Wait = true + installAction.WaitForJobs = true + _, err = installAction.Run(helmChart, helmValues) + require.NoError(t, err, "failed to install helm chart") + + podList := &corev1.PodList{} + err = client.Resources(testNamespace).List(ctx, podList) + require.NoError(t, err, fmt.Sprintf("failed to list pods in namespace %s", testNamespace)) + + checkedAgentContainers := 0 + + for _, pod := range podList.Items { + if !strings.HasPrefix(pod.GetName(), "agent-") { + continue + } + + command := []string{"elastic-agent", "status"} + var stdout, stderr bytes.Buffer + var agentHealthyErr error + // we will wait maximum 120 seconds for the agent to report healthy + for i := 0; i < 120; i++ { + stdout.Reset() + stderr.Reset() + agentHealthyErr = client.Resources().ExecInPod(ctx, testNamespace, pod.Name, "agent", command, &stdout, &stderr) + if agentHealthyErr == nil { + break + } + time.Sleep(time.Second * 1) + } + + statusString := stdout.String() + if agentHealthyErr != nil { + t.Errorf("elastic-agent never reported healthy: %v", agentHealthyErr) + t.Logf("stdout: %s\n", statusString) + t.Logf("stderr: %s\n", stderr.String()) + t.FailNow() + return + } + + stdout.Reset() + stderr.Reset() + + if tc.runK8SInnerTests { + err := client.Resources().ExecInPod(ctx, testNamespace, pod.Name, "agent", + []string{"/usr/share/elastic-agent/k8s-inner-tests", "-test.v"}, &stdout, &stderr) + t.Log(stdout.String()) + if err != nil { + t.Log(stderr.String()) + } + require.NoError(t, err, "error at k8s inner tests execution") + } + + checkedAgentContainers++ + } + + require.GreaterOrEqual(t, checkedAgentContainers, tc.atLeastValidatedPodsNumber, + fmt.Sprintf("at least %d agent containers should be checked", tc.atLeastValidatedPodsNumber)) + }) + } +} + +// deployK8SAgent is a helper function to deploy the elastic-agent in k8s and invoke the inner k8s tests if +// runK8SInnerTests is true +func deployK8SAgent(t *testing.T, ctx context.Context, client klient.Client, objects []k8s.Object, namespace string, + runInnerK8STests bool, testLogsBasePath string, componentPresence map[string]bool) { + + objects = append([]k8s.Object{&corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + }}, objects...) + + t.Cleanup(func() { + if t.Failed() { + dumpLogs(t, ctx, client, namespace, testLogsBasePath) + } + + // need to delete all k8s objects and wait for it as elastic-agent + // in k8s creates cluster-wide roles and having multiple of them at + // the same time isn't allowed + deleteK8SObjects(t, ctx, client, objects, true) + }) + + // Create the objects + for _, obj := range objects { + obj.SetNamespace(namespace) + err := client.Resources(namespace).Create(ctx, obj) + require.NoError(t, err, fmt.Sprintf("failed to create object %s", obj.GetName())) + } + + var agentPodName string + // Wait for pods to be ready + require.Eventually(t, func() bool { + podList := &corev1.PodList{} + err := client.Resources(namespace).List(ctx, podList) + require.NoError(t, err, fmt.Sprintf("failed to list pods in namespace %s", namespace)) + + for _, pod := range podList.Items { + if agentPodName == "" && strings.HasPrefix(pod.GetName(), "elastic-agent-standalone") { + agentPodName = pod.Name + } + + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.RestartCount > 0 { + return false + } + } + + for _, cond := range pod.Status.Conditions { + if cond.Type != corev1.PodReady { + continue + } + + if cond.Status != corev1.ConditionTrue { + return false + } + } + } + + return true + }, time.Second*100, time.Second*1, fmt.Sprintf("pods in namespace %s never became ready", namespace)) + + require.NotEmpty(t, agentPodName, "agent pod name is empty") + + command := []string{"elastic-agent", "status", "--output=json"} + var status atesting.AgentStatusOutput + var stdout, stderr bytes.Buffer + var agentHealthyErr error + // we will wait maximum 120 seconds for the agent to report healthy + for i := 0; i < 120; i++ { + status = atesting.AgentStatusOutput{} // clear status output + stdout.Reset() + stderr.Reset() + agentHealthyErr = client.Resources().ExecInPod(ctx, namespace, agentPodName, "elastic-agent-standalone", command, &stdout, &stderr) + if agentHealthyErr == nil { + if uerr := json.Unmarshal(stdout.Bytes(), &status); uerr == nil { + if status.State == int(aclient.Healthy) { + // agent is healthy innner tests should now pass + if runInnerK8STests { + err := client.Resources().ExecInPod(ctx, namespace, agentPodName, "elastic-agent-standalone", + []string{"/usr/share/elastic-agent/k8s-inner-tests", "-test.v"}, &stdout, &stderr) + t.Log(stdout.String()) + if err != nil { + t.Log(stderr.String()) + } + require.NoError(t, err, "error at k8s inner tests execution") + } + + // validate that the components defined are also healthy if they should exist + componentsCorrect := true + for component, shouldBePresent := range componentPresence { + compState, ok := getComponentState(status, component) + if shouldBePresent { + if !ok { + // doesn't exist + componentsCorrect = false + } else if compState != int(aclient.Healthy) { + // not healthy + componentsCorrect = false + } + } else if ok { + // should not be present + // break instantly and fail (as it existing should never happen) + break + } + } + if componentsCorrect { + // agent health and components are correct + return + } + } + } + } + time.Sleep(time.Second * 1) + } + + t.Errorf("elastic-agent never reported healthy: %+v", status) + t.Logf("stdout: %s\n", stdout.String()) + t.Logf("stderr: %s\n", stderr.String()) + t.FailNow() +} + +func getComponentState(status atesting.AgentStatusOutput, componentName string) (int, bool) { + for _, comp := range status.Components { + if comp.Name == componentName { + return comp.State, true + } + } + return -1, false +} + +// dumpLogs dumps the logs of all pods in the given namespace to the given target directory +func dumpLogs(t *testing.T, ctx context.Context, client klient.Client, namespace string, targetDir string) { + + podList := &corev1.PodList{} + + clientSet, err := kubernetes.NewForConfig(client.RESTConfig()) + if err != nil { + t.Logf("Error creating clientset: %v\n", err) + return + } + + err = client.Resources(namespace).List(ctx, podList) + if err != nil { + t.Logf("Error listing pods: %v\n", err) + return + } + + for _, pod := range podList.Items { + + previous := false + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.RestartCount > 0 { + previous = true + break + } + } + + for _, container := range pod.Spec.Containers { + logFilePath := filepath.Join(targetDir, fmt.Sprintf("%s-%s-%s.log", t.Name(), pod.Name, container.Name)) + logFile, err := os.Create(logFilePath) + if err != nil { + t.Logf("Error creating log file: %v\n", err) + continue + } + + req := clientSet.CoreV1().Pods(namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + Container: container.Name, + Previous: previous, + }) + podLogsStream, err := req.Stream(context.TODO()) + if err != nil { + t.Logf("Error getting container %s of pod %s logs: %v\n", container.Name, pod.Name, err) + continue + } + + _, err = io.Copy(logFile, podLogsStream) + if err != nil { + t.Logf("Error writing container %s of pod %s logs: %v\n", container.Name, pod.Name, err) + } else { + t.Logf("Wrote container %s of pod %s logs to %s\n", container.Name, pod.Name, logFilePath) + } + + _ = podLogsStream.Close() + } + } +} + +// adjustK8SAgentManifests adjusts the namespace of given k8s objects and calls the given callbacks for the containers and the pod +func adjustK8SAgentManifests(objects []k8s.Object, namespace string, containerName string, cbContainer func(container *corev1.Container), cbPod func(pod *corev1.PodSpec)) { + // Update the agent image and image pull policy as it is already loaded in kind cluster + for _, obj := range objects { + obj.SetNamespace(namespace) + var podSpec *corev1.PodSpec + switch objWithType := obj.(type) { + case *appsv1.DaemonSet: + podSpec = &objWithType.Spec.Template.Spec + case *appsv1.StatefulSet: + podSpec = &objWithType.Spec.Template.Spec + case *appsv1.Deployment: + podSpec = &objWithType.Spec.Template.Spec + case *appsv1.ReplicaSet: + podSpec = &objWithType.Spec.Template.Spec + case *batchv1.Job: + podSpec = &objWithType.Spec.Template.Spec + case *batchv1.CronJob: + podSpec = &objWithType.Spec.JobTemplate.Spec.Template.Spec + case *rbacv1.ClusterRoleBinding: + for idx, subject := range objWithType.Subjects { + if strings.HasPrefix(subject.Name, "elastic-agent") { + objWithType.Subjects[idx].Namespace = namespace + } + } + continue + case *rbacv1.RoleBinding: + for idx, subject := range objWithType.Subjects { + if strings.HasPrefix(subject.Name, "elastic-agent") { + objWithType.Subjects[idx].Namespace = namespace + } + } + continue + default: + continue + } + + for idx, container := range podSpec.Containers { + if container.Name != containerName { + continue + } + if cbContainer != nil { + cbContainer(&podSpec.Containers[idx]) + } + + if cbPod != nil { + cbPod(podSpec) + } + } + + } +} + +// yamlToK8SObjects converts yaml to k8s objects +func yamlToK8SObjects(reader *bufio.Reader) ([]k8s.Object, error) { + var objects []k8s.Object + + scheme := runtime.NewScheme() + scheme.AddKnownTypes(rbacv1.SchemeGroupVersion, &rbacv1.ClusterRoleBinding{}, &rbacv1.ClusterRoleBindingList{}) + scheme.AddKnownTypes(rbacv1.SchemeGroupVersion, &rbacv1.ClusterRole{}, &rbacv1.ClusterRoleList{}) + scheme.AddKnownTypes(rbacv1.SchemeGroupVersion, &rbacv1.RoleBinding{}, &rbacv1.RoleBindingList{}) + scheme.AddKnownTypes(rbacv1.SchemeGroupVersion, &rbacv1.Role{}, &rbacv1.RoleList{}) + scheme.AddKnownTypes(corev1.SchemeGroupVersion, &corev1.ServiceAccount{}, &corev1.ServiceAccountList{}) + scheme.AddKnownTypes(corev1.SchemeGroupVersion, &corev1.Service{}, &corev1.ServiceList{}) + scheme.AddKnownTypes(appsv1.SchemeGroupVersion, &appsv1.DaemonSet{}) + scheme.AddKnownTypes(appsv1.SchemeGroupVersion, &appsv1.StatefulSet{}) + scheme.AddKnownTypes(appsv1.SchemeGroupVersion, &appsv1.Deployment{}) + scheme.AddKnownTypes(corev1.SchemeGroupVersion, &corev1.Secret{}, &corev1.ConfigMap{}) + decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer() + + yamlReader := yaml.NewYAMLReader(reader) + for { + yamlBytes, err := yamlReader.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("failed to read YAML: %w", err) + } + obj, _, err := decoder.Decode(yamlBytes, nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to decode YAML: %w", err) + } + + k8sObj, ok := obj.(k8s.Object) + if !ok { + return nil, fmt.Errorf("failed to cast object to k8s.Object: %v", obj) + } + + objects = append(objects, k8sObj) + } + + return objects, nil +} + +// renderKustomize renders the given kustomize directory to YAML +func renderKustomize(kustomizePath string) ([]byte, error) { + // Create a file system pointing to the kustomize directory + fSys := filesys.MakeFsOnDisk() + + // Create a kustomizer + k := krusty.MakeKustomizer(krusty.MakeDefaultOptions()) + + // Run the kustomizer on the given directory + resMap, err := k.Run(fSys, kustomizePath) + if err != nil { + return nil, err + } + + // Convert the result to YAML + renderedManifest, err := resMap.AsYaml() + if err != nil { + return nil, err + } + + return renderedManifest, nil +} + +// generateESAPIKey generates an API key for the given Elasticsearch. +func generateESAPIKey(esClient *elasticsearch.Client, keyName string) (string, error) { + apiKeyReqBody := fmt.Sprintf(`{ + "name": "%s", + "expiration": "1d" + }`, keyName) + + resp, err := esClient.Security.CreateAPIKey(strings.NewReader(apiKeyReqBody)) + if err != nil { + return "", err + } + defer resp.Body.Close() + + response := make(map[string]interface{}) + err = json.NewDecoder(resp.Body).Decode(&response) + if err != nil { + return "", err + } + + keyToken := response["api_key"].(string) + if keyToken == "" { + return "", fmt.Errorf("key token is empty") + } + + keyID := response["id"].(string) + if keyID == "" { + return "", fmt.Errorf("key ID is empty") + } + + return fmt.Sprintf("%s:%s", keyID, keyToken), nil +} + +// deleteK8SObjects deletes the given k8s objects and waits for them to be deleted if wait is true. +func deleteK8SObjects(t *testing.T, ctx context.Context, client klient.Client, objects []k8s.Object, wait bool) { + for _, obj := range objects { + _ = client.Resources().Delete(ctx, obj) + } + + if !wait { + return + } + + timeoutCtx, timeoutCancel := context.WithTimeout(ctx, 10*time.Second) + defer timeoutCancel() + + for _, obj := range objects { + if timeoutCtx.Err() != nil { + break + } + + for i := 0; i < 10; i++ { + if timeoutCtx.Err() != nil { + break + } + + err := client.Resources().Get(timeoutCtx, obj.GetName(), obj.GetNamespace(), obj) + if err != nil { + break + } + time.Sleep(500 * time.Millisecond) + } + } + + if timeoutCtx.Err() != nil { + t.Log("Timeout waiting for k8s objects to be deleted") + } +} + +func int64Ptr(val int64) *int64 { + valPtr := val + return &valPtr +} diff --git a/x-pack/agentbeat/testing/integration/linux_deb_test.go b/x-pack/agentbeat/testing/integration/linux_deb_test.go new file mode 100644 index 000000000000..a7de3170ff07 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/linux_deb_test.go @@ -0,0 +1,194 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "os/exec" + "strings" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + + "github.com/elastic/elastic-agent-libs/kibana" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/check" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/testing/upgradetest" + + "github.com/stretchr/testify/require" +) + +func TestDebLogIngestFleetManaged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Deb, + Stack: &define.Stack{}, + OS: []define.OS{ + { + Type: define.Linux, + Distro: "ubuntu", + }, + }, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithPackageFormat("deb")) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add the current time at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + } + + // 2. Install the Elastic-Agent with the policy that + // was just created. + policy, err := tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + agentFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + t.Logf("created policy: %s", policy.ID) + check.ConnectedToFleet(ctx, t, agentFixture, 5*time.Minute) + + t.Run("Monitoring logs are shipped", func(t *testing.T) { + testMonitoringLogsAreShipped(t, ctx, info, agentFixture, policy) + }) + + t.Run("Normal logs with flattened data_stream are shipped", func(t *testing.T) { + testFlattenedDatastreamFleetPolicy(t, ctx, info, policy) + }) +} + +func TestDebFleetUpgrade(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Deb, + Stack: &define.Stack{}, + OS: []define.OS{ + { + Type: define.Linux, + Distro: "ubuntu", + }, + }, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // start from previous minor + upgradeFromVersion, err := upgradetest.PreviousMinor() + require.NoError(t, err) + startFixture, err := atesting.NewFixture( + t, + upgradeFromVersion.String(), + atesting.WithFetcher(atesting.ArtifactFetcher()), + atesting.WithPackageFormat("deb"), + ) + require.NoError(t, err) + + // end on the current build with deb + endFixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithPackageFormat("deb")) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add the current time at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + } + + // 2. Install the Elastic-Agent with the policy that + // was just created. + policy, err := tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + startFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + t.Logf("created policy: %s", policy.ID) + check.ConnectedToFleet(ctx, t, startFixture, 5*time.Minute) + + // 3. Upgrade deb to the build version + srcPackage, err := endFixture.SrcPackage(ctx) + require.NoError(t, err) + cmd := exec.CommandContext(ctx, "sudo", "apt-get", "install", "-y", "-qq", "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", srcPackage) + cmd.Env = append(cmd.Env, "DEBIAN_FRONTEND=noninteractive") + out, err := cmd.CombinedOutput() // #nosec G204 -- Need to pass in name of package + require.NoError(t, err, string(out)) + + // 4. Wait for version in Fleet to match + // Fleet will not include the `-SNAPSHOT` in the `GetAgentVersion` result + noSnapshotVersion := strings.TrimSuffix(define.Version(), "-SNAPSHOT") + require.Eventually(t, func() bool { + newVersion, err := fleettools.GetAgentVersion(ctx, info.KibanaClient, policy.ID) + if err != nil { + t.Logf("error getting agent version: %v", err) + return false + } + if noSnapshotVersion == newVersion { + return true + } + t.Logf("Got Agent version %s != %s", newVersion, noSnapshotVersion) + return false + }, 5*time.Minute, time.Second) +} diff --git a/x-pack/agentbeat/testing/integration/linux_rpm_test.go b/x-pack/agentbeat/testing/integration/linux_rpm_test.go new file mode 100644 index 000000000000..b76313cd33b6 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/linux_rpm_test.go @@ -0,0 +1,194 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "os/exec" + "strings" + "testing" + "time" + + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/testing/upgradetest" + + "github.com/gofrs/uuid/v5" + + "github.com/elastic/elastic-agent-libs/kibana" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/check" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + + "github.com/stretchr/testify/require" +) + +func TestRpmLogIngestFleetManaged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: RPM, + Stack: &define.Stack{}, + OS: []define.OS{ + { + Type: define.Linux, + Distro: "rhel", + }, + }, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithPackageFormat("rpm")) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add the current time at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + } + + // 2. Install the Elastic-Agent with the policy that + // was just created. + policy, err := tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + agentFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + t.Logf("created policy: %s", policy.ID) + check.ConnectedToFleet(ctx, t, agentFixture, 5*time.Minute) + + t.Run("Monitoring logs are shipped", func(t *testing.T) { + testMonitoringLogsAreShipped(t, ctx, info, agentFixture, policy) + }) + + t.Run("Normal logs with flattened data_stream are shipped", func(t *testing.T) { + testFlattenedDatastreamFleetPolicy(t, ctx, info, policy) + }) +} + +func TestRpmFleetUpgrade(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: RPM, + Stack: &define.Stack{}, + OS: []define.OS{ + { + Type: define.Linux, + Distro: "rhel", + }, + }, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + // start from snapshot of the rpm + upgradeFromVersion, err := upgradetest.PreviousMinor() + require.NoError(t, err) + startFixture, err := atesting.NewFixture( + t, + upgradeFromVersion.String(), + atesting.WithFetcher(atesting.ArtifactFetcher()), + atesting.WithPackageFormat("rpm"), + ) + require.NoError(t, err) + + // end on the current build with rpm + endFixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithPackageFormat("rpm")) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add the current time at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + } + + // 2. Install the Elastic-Agent with the policy that + // was just created. + policy, err := tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + startFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + t.Logf("created policy: %s", policy.ID) + check.ConnectedToFleet(ctx, t, startFixture, 5*time.Minute) + + // 3. Upgrade rpm to the build version + srcPackage, err := endFixture.SrcPackage(ctx) + require.NoError(t, err) + out, err := exec.CommandContext(ctx, "sudo", "rpm", "-U", "-v", srcPackage).CombinedOutput() // #nosec G204 -- Need to pass in name of package + require.NoError(t, err, string(out)) + + // 4. Wait for version in Fleet to match + // Fleet will not include the `-SNAPSHOT` in the `GetAgentVersion` result + noSnapshotVersion := strings.TrimSuffix(define.Version(), "-SNAPSHOT") + require.Eventually(t, func() bool { + t.Log("Getting Agent version...") + newVersion, err := fleettools.GetAgentVersion(ctx, info.KibanaClient, policy.ID) + if err != nil { + t.Logf("error getting agent version: %v", err) + return false + } + if noSnapshotVersion == newVersion { + return true + } + t.Logf("Got Agent version %s != %s", newVersion, noSnapshotVersion) + return false + }, 5*time.Minute, time.Second) +} diff --git a/x-pack/agentbeat/testing/integration/log_level_test.go b/x-pack/agentbeat/testing/integration/log_level_test.go new file mode 100644 index 000000000000..ac77e9c77a0d --- /dev/null +++ b/x-pack/agentbeat/testing/integration/log_level_test.go @@ -0,0 +1,392 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "testing" + "text/template" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + "github.com/elastic/elastic-agent/pkg/core/logger" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/pkg/utils" +) + +func TestSetLogLevelFleetManaged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Sudo: true, + }) + + deadline := time.Now().Add(10 * time.Minute) + ctx, cancel := testcontext.WithDeadline(t, context.Background(), deadline) + defer cancel() + + f, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err, "failed creating agent fixture") + + policyResp, enrollmentTokenResp := createPolicyAndEnrollmentToken(ctx, t, info.KibanaClient, createBasicPolicy()) + t.Logf("Created policy %+v", policyResp.AgentPolicy) + + t.Log("Getting default Fleet Server URL...") + fleetServerURL, err := fleettools.DefaultURL(ctx, info.KibanaClient) + require.NoError(t, err, "failed getting Fleet Server URL") + + installOutput, err := f.Install(ctx, &atesting.InstallOpts{ + NonInteractive: true, + Force: true, + EnrollOpts: atesting.EnrollOpts{ + URL: fleetServerURL, + EnrollmentToken: enrollmentTokenResp.APIKey, + }, + }) + + assert.NoErrorf(t, err, "Error installing agent. Install output:\n%s\n", string(installOutput)) + + require.Eventuallyf(t, func() bool { + return waitForAgentAndFleetHealthy(ctx, t, f) + }, time.Minute, time.Second, "agent never became healthy or connected to Fleet") + + // get the agent ID + agentID, err := getAgentID(ctx, f) + require.NoError(t, err, "error getting the agent ID") + + testLogLevelSetViaFleet(ctx, f, agentID, t, info, policyResp) +} + +func testLogLevelSetViaFleet(ctx context.Context, f *atesting.Fixture, agentID string, t *testing.T, info *define.Info, policyResp kibana.PolicyResponse) { + + // Step 0: get the initial log level reported by agent + initialLogLevel, err := getLogLevelFromInspectOutput(ctx, f) + require.NoError(t, err, "error retrieving agent log level") + assert.Equal(t, logger.DefaultLogLevel.String(), initialLogLevel, "unexpected default log level at agent startup") + + // Step 1: set a different log level in Fleet policy + policyLogLevel := logp.ErrorLevel + + t.Logf("Setting policy log level to %q", policyLogLevel.String()) + // make sure we are changing something + require.NotEqualf(t, logger.DefaultLogLevel, policyLogLevel, "Policy log level %s should be different than agent default log level", policyLogLevel) + // set policy log level and verify that eventually the agent sets it + err = updatePolicyLogLevel(ctx, t, info.KibanaClient, policyResp.AgentPolicy, policyLogLevel.String()) + require.NoError(t, err, "error updating policy log level") + + // assert `elastic-agent inspect` eventually reports the new log level + // TODO re-enable inspect assertion after https://github.com/elastic/elastic-agent/issues/4870 is solved + //assert.Eventuallyf(t, func() bool { + // agentLogLevel, err := getLogLevelFromInspectOutput(ctx, f) + // if err != nil { + // t.Logf("error getting log level from agent: %v", err) + // return false + // } + // t.Logf("Agent log level: %q policy log level: %q", agentLogLevel, policyLogLevel) + // return agentLogLevel == policyLogLevel.String() + //}, 30*time.Second, time.Second, "agent never received expected log level %q", policyLogLevel) + + // assert Fleet eventually receives the new log level from agent through checkin + assert.Eventuallyf(t, func() bool { + fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, t, info.KibanaClient, agentID) + if err != nil { + t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err) + return false + } + t.Logf("Fleet metadata log level for agent %q: %q policy log level: %q", agentID, fleetMetadataLogLevel, policyLogLevel) + return fleetMetadataLogLevel == policyLogLevel.String() + }, 30*time.Second, time.Second, "agent never communicated policy log level %q to Fleet", policyLogLevel) + + // Step 2: set a different log level for the specific agent using Settings action + // set agent log level and verify that it takes precedence over the policy one + agentLogLevel := logp.DebugLevel.String() + + t.Logf("Setting agent log level to %q", agentLogLevel) + + err = updateAgentLogLevel(ctx, t, info.KibanaClient, agentID, agentLogLevel) + require.NoError(t, err, "error updating agent log level") + + // TODO re-enable inspect assertion after https://github.com/elastic/elastic-agent/issues/4870 is solved + //assert.Eventuallyf(t, func() bool { + // actualAgentLogLevel, err := getLogLevelFromInspectOutput(ctx, f) + // if err != nil { + // t.Logf("error getting log level from agent: %v", err) + // return false + // } + // t.Logf("Agent log level: %q, expected level: %q", actualAgentLogLevel, agentLogLevel) + // return actualAgentLogLevel == agentLogLevel + //}, 2*time.Minute, time.Second, "agent never received agent-specific log level %q", agentLogLevel) + + // assert Fleet eventually receives the new log level from agent through checkin + assert.Eventuallyf(t, func() bool { + fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, t, info.KibanaClient, agentID) + if err != nil { + t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err) + return false + } + t.Logf("Fleet metadata log level for agent %q: %q agent log level: %q", agentID, fleetMetadataLogLevel, agentLogLevel) + return fleetMetadataLogLevel == agentLogLevel + }, 30*time.Second, time.Second, "agent never communicated agent-specific log level %q to Fleet", agentLogLevel) + + // Step 3: Clear the agent-specific log level override, verify that we revert to policy log level + t.Logf("Clearing agent log level, expecting log level to revert back to %q", policyLogLevel) + err = updateAgentLogLevel(ctx, t, info.KibanaClient, agentID, "") + require.NoError(t, err, "error clearing agent log level") + + // assert `elastic-agent inspect` eventually reports the new log level + // TODO re-enable inspect assertion after https://github.com/elastic/elastic-agent/issues/4870 is solved + //assert.Eventuallyf(t, func() bool { + // actualAgentLogLevel, err := getLogLevelFromInspectOutput(ctx, f) + // if err != nil { + // t.Logf("error getting log level from agent: %v", err) + // return false + // } + // t.Logf("Agent log level: %q policy log level: %q", actualAgentLogLevel, policyLogLevel) + // return actualAgentLogLevel == policyLogLevel.String() + //}, 30*time.Second, time.Second, "agent never reverted to policy log level %q", policyLogLevel) + + // assert Fleet eventually receives the new log level from agent through checkin + assert.Eventuallyf(t, func() bool { + fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, t, info.KibanaClient, agentID) + if err != nil { + t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err) + return false + } + t.Logf("Fleet metadata log level for agent %q: %q policy log level: %q", agentID, fleetMetadataLogLevel, policyLogLevel) + return fleetMetadataLogLevel == policyLogLevel.String() + }, 30*time.Second, time.Second, "agent never communicated reverting to policy log level %q to Fleet", policyLogLevel) + + // Step 4: Clear the log level in policy and verify that agent reverts to the initial log level + t.Logf("Clearing policy log level, expecting log level to revert back to %q", initialLogLevel) + err = updatePolicyLogLevel(ctx, t, info.KibanaClient, policyResp.AgentPolicy, "") + require.NoError(t, err, "error clearing policy log level") + + // assert `elastic-agent inspect` eventually reports the initial log level + // TODO re-enable inspect assertion after https://github.com/elastic/elastic-agent/issues/4870 is solved + //assert.Eventuallyf(t, func() bool { + // actualAgentLogLevel, err := getLogLevelFromInspectOutput(ctx, f) + // if err != nil { + // t.Logf("error getting log level from agent: %v", err) + // return false + // } + // t.Logf("Agent log level: %q initial log level: %q", actualAgentLogLevel, initialLogLevel) + // return actualAgentLogLevel == initialLogLevel + //}, 2*time.Minute, time.Second, "agent never reverted to initial log level %q", initialLogLevel) + + // assert Fleet eventually receives the new log level from agent through checkin + assert.Eventuallyf(t, func() bool { + fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, t, info.KibanaClient, agentID) + if err != nil { + t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err) + return false + } + t.Logf("Fleet metadata log level for agent %q: %q initial log level: %q", agentID, fleetMetadataLogLevel, initialLogLevel) + return fleetMetadataLogLevel == initialLogLevel + }, 30*time.Second, time.Second, "agent never communicated initial log level %q to Fleet", initialLogLevel) +} + +func waitForAgentAndFleetHealthy(ctx context.Context, t *testing.T, f *atesting.Fixture) bool { + status, err := f.ExecStatus(ctx) + if err != nil { + t.Logf("error fetching agent status: %v", err) + return false + } + + statusBuffer := new(strings.Builder) + err = json.NewEncoder(statusBuffer).Encode(status) + if err != nil { + t.Logf("error marshaling agent status: %v", err) + } else { + t.Logf("agent status: %v", statusBuffer.String()) + } + + return status.State == int(cproto.State_HEALTHY) && status.FleetState == int(cproto.State_HEALTHY) +} + +func updateAgentLogLevel(ctx context.Context, t *testing.T, kibanaClient *kibana.Client, agentID string, logLevel string) error { + updateLogLevelTemplateString := `{ + "action": { + "type": "SETTINGS", + "data": { + "log_level": {{ .logLevel }} + } + } + }` + updateLogLevelTemplate, err := template.New("updatePolicyLogLevel").Parse(updateLogLevelTemplateString) + if err != nil { + return fmt.Errorf("error parsing update log level request template: %w", err) + } + + buf := new(bytes.Buffer) + templateData := map[string]string{} + if logLevel != "" { + templateData["logLevel"] = `"` + logLevel + `"` + } else { + templateData["logLevel"] = "null" + } + + err = updateLogLevelTemplate.Execute(buf, templateData) + t.Logf("Updating agent-specific log level to %q", logLevel) + _, err = kibanaClient.SendWithContext(ctx, http.MethodPost, "/api/fleet/agents/"+agentID+"/actions", nil, nil, buf) + if err != nil { + return fmt.Errorf("error executing fleet request: %w", err) + } + + // The log below is a bit spammy but it can be useful for debugging + //respDump, err := httputil.DumpResponse(fleetResp, true) + //if err != nil { + // t.Logf("Error dumping Fleet response to updating agent-specific log level: %v", err) + //} else { + // t.Logf("Fleet response to updating agent-specific log level:\n----- BEGIN RESPONSE DUMP -----\n%s\n----- END RESPONSE DUMP -----\n", string(respDump)) + //} + + return nil +} + +func updatePolicyLogLevel(ctx context.Context, t *testing.T, kibanaClient *kibana.Client, policy kibana.AgentPolicy, newPolicyLogLevel string) error { + // The request we would need is the one below, but at the time of writing there is no way to set overrides with fleet api definition in elastic-agent-libs, need to update + // info.KibanaClient.UpdatePolicy(ctx, policyResp.ID, kibana.AgentPolicyUpdateRequest{}) + // Let's do a generic HTTP request + + updateLogLevelTemplateString := `{ + "name": "{{ .policyName }}", + "namespace": "{{ .namespace }}", + "advanced_settings": { + "agent_logging_level": {{ .logLevel }} + } + }` + updateLogLevelTemplate, err := template.New("updatePolicyLogLevel").Parse(updateLogLevelTemplateString) + if err != nil { + return fmt.Errorf("error parsing update log level request template: %w", err) + } + + buf := new(bytes.Buffer) + templateData := map[string]string{"policyName": policy.Name, "namespace": policy.Namespace} + if newPolicyLogLevel == "" { + // to reset the log level we have to set it to null + templateData["logLevel"] = "null" + } else { + templateData["logLevel"] = `"` + newPolicyLogLevel + `"` + } + + err = updateLogLevelTemplate.Execute(buf, templateData) + if err != nil { + return fmt.Errorf("error rendering policy update template: %w", err) + } + + _, err = kibanaClient.SendWithContext(ctx, http.MethodPut, "/api/fleet/agent_policies/"+policy.ID, nil, nil, buf) + + if err != nil { + return fmt.Errorf("error executing fleet request: %w", err) + } + + // The log below is a bit spammy but it can be useful for debugging + //respDump, err := httputil.DumpResponse(fleetResp, true) + //if err != nil { + // t.Logf("Error dumping Fleet response to updating policy log level: %v", err) + //} else { + // t.Logf("Fleet response to updating policy log level:\n----- BEGIN RESPONSE DUMP -----\n%s\n----- END RESPONSE DUMP -----\n", string(respDump)) + //} + + return nil +} + +func getAgentID(ctx context.Context, f *atesting.Fixture) (string, error) { + agentInspectOutput, err := f.ExecInspect(ctx) + if err != nil { + return "", fmt.Errorf("executing elastic-agent inspect: %w", err) + } + + return agentInspectOutput.Agent.ID, nil +} + +func getLogLevelFromInspectOutput(ctx context.Context, f *atesting.Fixture) (string, error) { + agentInspectOutput, err := f.ExecInspect(ctx) + if err != nil { + return "", fmt.Errorf("executing elastic-agent inspect: %w", err) + } + + return agentInspectOutput.Agent.Logging.Level, nil +} + +func getLogLevelFromFleetMetadata(ctx context.Context, t *testing.T, kibanaClient *kibana.Client, agentID string) (string, error) { + // The request we would need is kibanaClient.GetAgent(), but at the time of writing there is no way to get loglevel with fleet api definition in elastic-agent-libs, need to update + // kibana.AgentCommon struct to pick up log level from `local_metadata` + // Let's do a generic HTTP request + + response, err := kibanaClient.SendWithContext(ctx, http.MethodGet, "/api/fleet/agents/"+agentID, nil, nil, nil) + if err != nil { + return "", fmt.Errorf("getting agent from Fleet: %w", err) + } + defer response.Body.Close() + + // The log below is a bit spammy but it can be useful for debugging + //dumpResponse, err := httputil.DumpResponse(response, true) + //if err != nil { + // t.Logf(" error dumping agent metadata fleet response: %v", err) + //} else { + // t.Logf("agent metadata fleet response:\n----- BEGIN RESPONSE DUMP -----\n%s\n----- END RESPONSE DUMP -----", dumpResponse) + //} + + responseBodyBytes, err := io.ReadAll(response.Body) + if err != nil { + return "", fmt.Errorf("reading response body from Fleet: %w", err) + } + + rawJson := map[string]any{} + err = json.Unmarshal(responseBodyBytes, &rawJson) + if err != nil { + return "", fmt.Errorf("unmarshalling Fleet response: %w", err) + } + rawLogLevel, err := utils.GetNestedMap(rawJson, "item", "local_metadata", "elastic", "agent", "log_level") + if err != nil { + return "", fmt.Errorf("looking for item/local_metadata/elastic/agent/log_level key in Fleet response: %w", err) + } + + if logLevel, ok := rawLogLevel.(string); ok { + return logLevel, nil + } + return "", fmt.Errorf("loglevel from Fleet output is not a string: %T", rawLogLevel) +} + +func createPolicyAndEnrollmentToken(ctx context.Context, t *testing.T, kibClient *kibana.Client, policy kibana.AgentPolicy) (kibana.PolicyResponse, kibana.CreateEnrollmentAPIKeyResponse) { + t.Log("Creating Agent policy...") + policyResp, err := kibClient.CreatePolicy(ctx, policy) + require.NoError(t, err, "failed creating policy") + + t.Log("Creating Agent enrollment API key...") + createEnrollmentApiKeyReq := kibana.CreateEnrollmentAPIKeyRequest{ + PolicyID: policyResp.ID, + } + enrollmentToken, err := kibClient.CreateEnrollmentAPIKey(ctx, createEnrollmentApiKeyReq) + require.NoError(t, err, "failed creating enrollment API key") + return policyResp, enrollmentToken +} +func createBasicPolicy() kibana.AgentPolicy { + policyUUID := uuid.Must(uuid.NewV4()).String() + return kibana.AgentPolicy{ + Name: "testloglevel-policy-" + policyUUID, + Namespace: "default", + Description: "Test Log Level Policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{}, + } +} diff --git a/x-pack/agentbeat/testing/integration/logs_ingestion_test.go b/x-pack/agentbeat/testing/integration/logs_ingestion_test.go new file mode 100644 index 000000000000..56a2fcf2e29a --- /dev/null +++ b/x-pack/agentbeat/testing/integration/logs_ingestion_test.go @@ -0,0 +1,552 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + "text/template" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/hectane/go-acl" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/pkg/control/v2/client" + "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/check" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" + "github.com/elastic/elastic-agent/pkg/testing/tools/fleettools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/elastic-agent/testing/installtest" + "github.com/elastic/elastic-transport-go/v8/elastictransport" + + "github.com/rcrowley/go-metrics" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + mockes "github.com/elastic/mock-es/pkg/api" +) + +func TestLogIngestionFleetManaged(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, + Sudo: true, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // 1. Create a policy in Fleet with monitoring enabled. + // To ensure there are no conflicts with previous test runs against + // the same ESS stack, we add the current time at the end of the policy + // name. This policy does not contain any integration. + t.Log("Enrolling agent in Fleet with a test policy") + createPolicyReq := kibana.AgentPolicy{ + Name: fmt.Sprintf("test-policy-enroll-%s", uuid.Must(uuid.NewV4()).String()), + Namespace: info.Namespace, + Description: "test policy for agent enrollment", + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + AgentFeatures: []map[string]interface{}{ + { + "name": "test_enroll", + "enabled": true, + }, + }, + Overrides: map[string]interface{}{ + "agent": map[string]interface{}{ + "monitoring": map[string]interface{}{ + "metrics_period": "1s", + }, + }, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + } + + // 2. Install the Elastic-Agent with the policy that + // was just created. + policy, err := tools.InstallAgentWithPolicy( + ctx, + t, + installOpts, + agentFixture, + info.KibanaClient, + createPolicyReq) + require.NoError(t, err) + t.Logf("created policy: %s", policy.ID) + check.ConnectedToFleet(ctx, t, agentFixture, 5*time.Minute) + + // 3. Ensure installation is correct. + require.NoError(t, installtest.CheckSuccess(ctx, agentFixture, installOpts.BasePath, &installtest.CheckOpts{Privileged: installOpts.Privileged})) + + // 4. Ensure healthy state at startup + checkHealthAtStartup(t, ctx, agentFixture) + + t.Run("Monitoring logs are shipped", func(t *testing.T) { + testMonitoringLogsAreShipped(t, ctx, info, agentFixture, policy) + }) + + t.Run("Normal logs with flattened data_stream are shipped", func(t *testing.T) { + testFlattenedDatastreamFleetPolicy(t, ctx, info, policy) + }) +} + +func startMockES(t *testing.T) string { + registry := metrics.NewRegistry() + uid := uuid.Must(uuid.NewV4()) + clusterUUID := uuid.Must(uuid.NewV4()).String() + + mux := http.NewServeMux() + mux.Handle("/", mockes.NewAPIHandler( + uid, + clusterUUID, + registry, + time.Now().Add(time.Hour), 0, 0, 0, 100, 0)) + + s := httptest.NewServer(mux) + t.Cleanup(s.Close) + + return s.URL +} + +// checkHealthAtStartup ensures all the beats and agent are healthy and working before we continue +func checkHealthAtStartup(t *testing.T, ctx context.Context, agentFixture *atesting.Fixture) { + // because we need to separately fetch the PIDs, wait until everything is healthy before we look for running beats + compDebugName := "" + require.Eventually(t, func() bool { + allHealthy := true + status, err := agentFixture.ExecStatus(ctx) + if err != nil { + t.Logf("agent status returned an error: %v", err) + return false + } + t.Logf("Received agent status:\n%+v\n", status) // this can be re-marshaled to JSON if we prefer that notation + for _, comp := range status.Components { + // make sure the components include the expected integrations + for _, v := range comp.Units { + if v.State != int(cproto.State_HEALTHY) { + allHealthy = false + } + } + if comp.State != int(cproto.State_HEALTHY) { + compDebugName = comp.Name + allHealthy = false + } + } + return allHealthy + }, 3*time.Minute, 3*time.Second, "install never became healthy: components did not return a healthy state: %s", compDebugName) +} + +func testMonitoringLogsAreShipped( + t *testing.T, + ctx context.Context, + info *define.Info, + agentFixture *atesting.Fixture, + policy kibana.PolicyResponse, +) { + // Stage 1: Make sure metricbeat logs are populated + t.Log("Making sure metricbeat logs are populated") + docs := findESDocs(t, func() (estools.Documents, error) { + return estools.GetLogsForDataset(ctx, info.ESClient, "elastic_agent.metricbeat") + }) + t.Logf("metricbeat: Got %d documents", len(docs.Hits.Hits)) + require.NotZero(t, len(docs.Hits.Hits), + "Looking for logs in dataset 'elastic_agent.metricbeat'") + + // Stage 2: make sure all components are healthy + t.Log("Making sure all components are healthy") + status, err := agentFixture.ExecStatus(ctx) + require.NoError(t, err, + "could not get agent status to verify all components are healthy") + for _, c := range status.Components { + assert.Equalf(t, client.Healthy, client.State(c.State), + "component %s: want %s, got %s", + c.Name, client.Healthy, client.State(c.State)) + } + + // Stage 3: Make sure there are no errors in logs + t.Log("Making sure there are no error logs") + docs = queryESDocs(t, func() (estools.Documents, error) { + return estools.CheckForErrorsInLogs(ctx, info.ESClient, info.Namespace, []string{ + // acceptable error messages (include reason) + "Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated + "Failed to apply initial policy from on disk configuration", + "Failed to connect to backoff(elasticsearch(http://127.0.0.1:9200)): Get \"http://127.0.0.1:9200\": dial tcp 127.0.0.1:9200: connect: connection refused", // Deb test + "Failed to download artifact", + "Failed to initialize artifact", + "Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues + "add_cloud_metadata: received error failed fetching EC2 Identity Document", // okay for the cloud metadata to not work + "add_cloud_metadata: received error failed requesting openstack metadata", // okay for the cloud metadata to not work + "add_cloud_metadata: received error failed with http status code 404", // okay for the cloud metadata to not work + "elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart + "failed to invoke rollback watcher: failed to start Upgrade Watcher", // on debian this happens probably need to fix. + "falling back to IMDSv1: operation error ec2imds: getToken", // okay for the cloud metadata to not work + }) + }) + t.Logf("error logs: Got %d documents", len(docs.Hits.Hits)) + messages := make([]string, 0, len(docs.Hits.Hits)) + for _, doc := range docs.Hits.Hits { + t.Logf("%#v", doc.Source) + message, ok := doc.Source["message"] + if !ok { + continue + } + messageStr, ok := message.(string) + if !ok { + continue + } + messages = append(messages, messageStr) + } + require.Emptyf(t, docs.Hits.Hits, "list of error messages is expected to be empty, found:\n%s", strings.Join(messages, ", \n")) + + // Stage 3: Make sure we have message confirming central management is running + t.Log("Making sure we have message confirming central management is running") + docs = findESDocs(t, func() (estools.Documents, error) { + return estools.FindMatchingLogLines(ctx, info.ESClient, info.Namespace, + "Parsed configuration and determined agent is managed by Fleet") + }) + require.NotZero(t, len(docs.Hits.Hits)) + + // Stage 4: verify logs from the monitoring components are not sent to the output + t.Log("Check monitoring logs") + hostname, err := os.Hostname() + if err != nil { + t.Fatalf("could not get hostname to filter Agent: %s", err) + } + + agentID, err := fleettools.GetAgentIDByHostname(ctx, info.KibanaClient, policy.ID, hostname) + require.NoError(t, err, "could not get Agent ID by hostname") + t.Logf("Agent ID: %q", agentID) + + // We cannot search for `component.id` because at the moment of writing + // this field is not mapped. There is an issue for that: + // https://github.com/elastic/integrations/issues/6545 + // TODO: use runtime fields while the above issue is not resolved. + docs = findESDocs(t, func() (estools.Documents, error) { + return estools.GetLogsForAgentID(ctx, info.ESClient, agentID) + }) + require.NoError(t, err, "could not get logs from Agent ID: %q, err: %s", + agentID, err) + + monRegExp := regexp.MustCompile(".*-monitoring$") + for i, d := range docs.Hits.Hits { + // Lazy way to navigate a map[string]any: convert to JSON then + // decode into a struct. + jsonData, err := json.Marshal(d.Source) + if err != nil { + t.Fatalf("could not encode document source as JSON: %s", err) + } + + doc := ESDocument{} + if err := json.Unmarshal(jsonData, &doc); err != nil { + t.Fatalf("could not unmarshal document source: %s", err) + } + + if monRegExp.MatchString(doc.Component.ID) { + t.Errorf("[%d] Document on index %q with 'component.id': %q "+ + "and 'elastic_agent.id': %q. 'elastic_agent.id' must not "+ + "end in '-monitoring'\n", + i, d.Index, doc.Component.ID, doc.ElasticAgent.ID) + } + } +} + +// queryESDocs runs `findFn` until it returns no error. Zero documents returned +// is considered a success. +func queryESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { + var docs estools.Documents + require.Eventually( + t, + func() bool { + var err error + docs, err = findFn() + if err != nil { + t.Logf("got an error querying ES, retrying. Error: %s", err) + } + return err == nil + }, + 3*time.Minute, + 15*time.Second, + ) + + return docs +} + +// findESDocs runs `findFn` until at least one document is returned and there is no error +func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { + var docs estools.Documents + require.Eventually( + t, + func() bool { + var err error + docs, err = findFn() + if err != nil { + t.Logf("got an error querying ES, retrying. Error: %s", err) + return false + } + + return docs.Hits.Total.Value != 0 + }, + 3*time.Minute, + 15*time.Second, + ) + + return docs +} + +func testFlattenedDatastreamFleetPolicy( + t *testing.T, + ctx context.Context, + info *define.Info, + policy kibana.PolicyResponse, +) { + dsType := "logs" + id := uuid.Must(uuid.NewV4()).String() + dsNamespace := cleanString(fmt.Sprintf("namespace-%s", id)) + dsDataset := cleanString(fmt.Sprintf("dataset-%s", id)) + numEvents := 60 + + // tempDir is not deleted to help with debugging issues + // useful to check permissions on contents + tempDir, err := os.MkdirTemp("", "fleet-ingest-*") + if err != nil { + t.Fatalf("failed to create temp directory: %s", err) + } + err = acl.Chmod(tempDir, 0o755) // `acl.Chmod` is used to ensure unprivileged mode on Windows works + if err != nil { + t.Fatalf("failed to chmod temp directory %s: %s", tempDir, err) + } + logFilePath := filepath.Join(tempDir, "log.log") + generateLogFile(t, logFilePath, 2*time.Millisecond, numEvents) + + // 1. Prepare a request to add an integration to the policy + tmpl, err := template.New(t.Name() + "custom-log-policy").Parse(policyJSON) + if err != nil { + t.Fatalf("cannot parse template: %s", err) + } + + // The time here ensures there are no conflicts with the integration name + // in Fleet. + agentPolicyBuilder := strings.Builder{} + err = tmpl.Execute(&agentPolicyBuilder, policyVars{ + Name: "Log-Input-" + t.Name() + "-" + time.Now().Format(time.RFC3339), + PolicyID: policy.ID, + LogFilePath: logFilePath, + Namespace: dsNamespace, + Dataset: dsDataset, + }) + if err != nil { + t.Fatalf("could not render template: %s", err) + } + // We keep a copy of the policy for debugging prurposes + agentPolicy := agentPolicyBuilder.String() + + // 2. Call Kibana to create the policy. + // Docs: https://www.elastic.co/guide/en/fleet/current/fleet-api-docs.html#create-integration-policy-api + resp, err := info.KibanaClient.Connection.Send( + http.MethodPost, + "/api/fleet/package_policies", + nil, + nil, + bytes.NewBufferString(agentPolicy)) + if err != nil { + t.Fatalf("could not execute request to Kibana/Fleet: %s", err) + } + if resp.StatusCode != http.StatusOK { + // On error dump the whole request response so we can easily spot + // what went wrong. + t.Errorf("received a non 200-OK when adding package to policy. "+ + "Status code: %d", resp.StatusCode) + respDump, err := httputil.DumpResponse(resp, true) + if err != nil { + t.Fatalf("could not dump error response from Kibana: %s", err) + } + // Make debugging as easy as possible + t.Log("================================================================================") + t.Log("Kibana error response:") + t.Log(string(respDump)) + t.Log("================================================================================") + t.Log("Rendered policy:") + t.Log(agentPolicy) + t.Log("================================================================================") + t.FailNow() + } + + require.Eventually( + t, + ensureDocumentsInES(t, ctx, info.ESClient, dsType, dsDataset, dsNamespace, numEvents), + 120*time.Second, + time.Second, + "could not get all expected documents form ES") +} + +// ensureDocumentsInES asserts the documents were ingested into the correct +// datastream +func ensureDocumentsInES( + t *testing.T, + ctx context.Context, + esClient elastictransport.Interface, + dsType, dsDataset, dsNamespace string, + numEvents int, +) func() bool { + + f := func() bool { + t.Helper() + + docs, err := estools.GetLogsForDatastream(ctx, esClient, dsType, dsDataset, dsNamespace) + if err != nil { + t.Logf("error quering ES, will retry later: %s", err) + } + + if docs.Hits.Total.Value == numEvents { + return true + } + + return false + + } + + return f +} + +// generateLogFile generates a log file by appending new lines every tick +// the lines are composed by the test name and the current time in RFC3339Nano +// This function spans a new goroutine and does not block +func generateLogFile(t *testing.T, fullPath string, tick time.Duration, events int) { + t.Helper() + f, err := os.Create(fullPath) + if err != nil { + t.Fatalf("could not create file '%s': %s", fullPath, err) + } + err = acl.Chmod(fullPath, 0o644) // `acl.Chmod` is used to ensure unprivileged mode on Windows works + if err != nil { + t.Fatalf("failed to chmod file '%s': %s", fullPath, err) + } + + go func() { + t.Helper() + ticker := time.NewTicker(tick) + t.Cleanup(ticker.Stop) + + done := make(chan struct{}) + t.Cleanup(func() { close(done) }) + + defer func() { + if err := f.Close(); err != nil { + t.Errorf("could not close log file '%s': %s", fullPath, err) + } + }() + + i := 0 + for { + select { + case <-done: + return + case now := <-ticker.C: + i++ + _, err := fmt.Fprintln(f, t.Name(), "Iteration: ", i, now.Format(time.RFC3339Nano)) + if err != nil { + // The Go compiler does not allow me to call t.Fatalf from a non-test + // goroutine, t.Errorf is our only option + t.Errorf("could not write data to log file '%s': %s", fullPath, err) + return + } + // make sure log lines are synced as quickly as possible + if err := f.Sync(); err != nil { + t.Errorf("could not sync file '%s': %s", fullPath, err) + } + if i == events { + return + } + } + } + }() +} + +func cleanString(s string) string { + return nonAlphanumericRegex.ReplaceAllString(strings.ToLower(s), "") +} + +var nonAlphanumericRegex = regexp.MustCompile(`[^a-zA-Z0-9 ]+`) + +var policyJSON = ` +{ + "policy_id": "{{.PolicyID}}", + "package": { + "name": "log", + "version": "2.3.0" + }, + "name": "{{.Name}}", + "namespace": "{{.Namespace}}", + "inputs": { + "logs-logfile": { + "enabled": true, + "streams": { + "log.logs": { + "enabled": true, + "vars": { + "paths": [ + "{{.LogFilePath | js}}" {{/* we need to escape windows paths */}} + ], + "data_stream.dataset": "{{.Dataset}}" + } + } + } + } + } +}` + +type policyVars struct { + Name string + PolicyID string + LogFilePath string + Namespace string + Dataset string +} + +type ESDocument struct { + ElasticAgent ElasticAgent `json:"elastic_agent"` + Component Component `json:"component"` + Host Host `json:"host"` +} +type ElasticAgent struct { + ID string `json:"id"` + Version string `json:"version"` + Snapshot bool `json:"snapshot"` +} +type Component struct { + Binary string `json:"binary"` + ID string `json:"id"` +} +type Host struct { + Hostname string `json:"hostname"` +} diff --git a/x-pack/agentbeat/testing/integration/metrics_monitoring_test.go b/x-pack/agentbeat/testing/integration/metrics_monitoring_test.go new file mode 100644 index 000000000000..08eda658ce61 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/metrics_monitoring_test.go @@ -0,0 +1,146 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/kibana" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" +) + +type MetricsRunner struct { + suite.Suite + info *define.Info + agentFixture *atesting.Fixture + + ESHost string +} + +func TestMetricsMonitoringCorrectBinaries(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + {Type: define.Windows}, + }, + }) + + suite.Run(t, &MetricsRunner{info: info}) +} + +func (runner *MetricsRunner) SetupSuite() { + fixture, err := define.NewFixtureFromLocalBuild(runner.T(), define.Version()) + require.NoError(runner.T(), err) + runner.agentFixture = fixture + + policyUUID := uuid.Must(uuid.NewV4()).String() + basePolicy := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + + policyResp, err := tools.InstallAgentWithPolicy(ctx, runner.T(), installOpts, runner.agentFixture, runner.info.KibanaClient, basePolicy) + require.NoError(runner.T(), err) + + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "system", "1.53.1", "system_integration_setup.json", uuid.Must(uuid.NewV4()).String(), policyResp.ID) + require.NoError(runner.T(), err) + +} + +func (runner *MetricsRunner) TestBeatsMetrics() { + UnitOutputName := "default" + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*20) + defer cancel() + agentStatus, err := runner.agentFixture.ExecStatus(ctx) + require.NoError(runner.T(), err) + + componentIds := []string{ + fmt.Sprintf("system/metrics-%s", UnitOutputName), + fmt.Sprintf("log-%s", UnitOutputName), + "beat/metrics-monitoring", + "elastic-agent", + "http/metrics-monitoring", + "filestream-monitoring", + } + + require.Eventually(runner.T(), func() bool { + for _, cid := range componentIds { + query := genESQuery(agentStatus.Info.ID, cid) + res, err := estools.PerformQueryForRawQuery(ctx, query, "metrics-elastic_agent*", runner.info.ESClient) + require.NoError(runner.T(), err) + runner.T().Logf("Fetched metrics for %s, got %d hits", cid, res.Hits.Total.Value) + if res.Hits.Total.Value < 1 { + return false + } + + } + return true + }, time.Minute*10, time.Second*10, "could not fetch metrics for all known beats in default install: %v", componentIds) +} + +func genESQuery(agentID string, componentID string) map[string]interface{} { + // see https://github.com/elastic/kibana/blob/main/x-pack/plugins/fleet/server/services/agents/agent_metrics.ts + queryRaw := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "must": []map[string]interface{}{ + { + "match": map[string]interface{}{ + "agent.id": agentID, + }, + }, + { + "match": map[string]interface{}{ + "component.id": componentID, + }, + }, + // make sure we fetch documents that have the metric field used by fleet monitoring + { + "exists": map[string]interface{}{ + "field": "system.process.cpu.total.value", + }, + }, + { + "exists": map[string]interface{}{ + "field": "system.process.memory.size", + }, + }, + }, + }, + }, + } + + return queryRaw +} diff --git a/x-pack/agentbeat/testing/integration/monitoring_endpoint_test.go b/x-pack/agentbeat/testing/integration/monitoring_endpoint_test.go new file mode 100644 index 000000000000..626e250e0a7b --- /dev/null +++ b/x-pack/agentbeat/testing/integration/monitoring_endpoint_test.go @@ -0,0 +1,266 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "context" + "os/exec" + "runtime" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/kibana" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" +) + +type EndpointMetricsMonRunner struct { + suite.Suite + info *define.Info + fixture *atesting.Fixture + endpointID string +} + +func TestEndpointAgentServiceMonitoring(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + }, + }) + + // Get path to agent executable. + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err, "could not create agent fixture") + + runner := &EndpointMetricsMonRunner{ + info: info, + fixture: fixture, + endpointID: "endpoint-default", + } + + suite.Run(t, runner) +} + +func (runner *EndpointMetricsMonRunner) SetupSuite() { + deadline := time.Now().Add(10 * time.Minute) + ctx, cancel := testcontext.WithDeadline(runner.T(), context.Background(), deadline) + defer cancel() + + runner.T().Log("Enrolling the agent in Fleet") + policyUUID := uuid.Must(uuid.NewV4()).String() + + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + policy, err := tools.InstallAgentWithPolicy(ctx, runner.T(), + installOpts, runner.fixture, runner.info.KibanaClient, createPolicyReq) + require.NoError(runner.T(), err, "failed to install agent with policy") + + runner.T().Log("Installing Elastic Defend") + pkgPolicyResp, err := installElasticDefendPackage(runner.T(), runner.info, policy.ID) + require.NoErrorf(runner.T(), err, "Policy Response was: %v", pkgPolicyResp) + + runner.T().Log("Polling for endpoint-security to become Healthy") + ctx, cancel = context.WithTimeout(ctx, time.Minute*3) + defer cancel() + + agentClient := runner.fixture.Client() + err = agentClient.Connect(ctx) + require.NoError(runner.T(), err, "could not connect to local agent") + + require.Eventually(runner.T(), + func() bool { return agentAndEndpointAreHealthy(runner.T(), ctx, agentClient) }, + time.Minute*3, + time.Second, + "Endpoint component or units are not healthy.", + ) + +} + +func (runner *EndpointMetricsMonRunner) TestEndpointMetrics() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*15) + defer cancel() + + agentStatus, err := runner.fixture.ExecStatus(ctx) + require.NoError(runner.T(), err) + + require.Eventually(runner.T(), func() bool { + + query := genESQueryByBinary(agentStatus.Info.ID, runner.endpointID) + res, err := estools.PerformQueryForRawQuery(ctx, query, "metrics-elastic_agent*", runner.info.ESClient) + require.NoError(runner.T(), err) + runner.T().Logf("Fetched metrics for %s, got %d hits", runner.endpointID, res.Hits.Total.Value) + return res.Hits.Total.Value >= 1 + }, time.Minute*10, time.Second*10, "could not fetch component metricsets for endpoint with ID %s and agent ID %s", runner.endpointID, agentStatus.Info.ID) + +} + +func (runner *EndpointMetricsMonRunner) TestEndpointMetricsAfterRestart() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*15) + defer cancel() + // once we've gotten the first round of metrics,forcably restart endpoint, see if we still get metrics + // This makes sure that the backend coordinator can deal with properly updating the metrics handlers if there's unexpected state changes + + // confine this to linux; the behavior is platform-agnostic, and this way we have `pgrep` + if runtime.GOOS != "linux" { + return + } + + // kill endpoint + cmd := exec.Command("pgrep", "-f", "endpoint") + pgrep, err := cmd.CombinedOutput() + runner.T().Logf("killing pid: %s", string(pgrep)) + + cmd = exec.Command("pkill", "--signal", "SIGKILL", "-f", "endpoint") + _, err = cmd.CombinedOutput() + require.NoError(runner.T(), err) + + // wait for endpoint to come back up. We use `pgrep` + // since the agent health status won't imidately register that the endpoint process itself is gone. + require.Eventually(runner.T(), func() bool { + cmd := exec.Command("pgrep", "-f", "endpoint") + pgrep, err := cmd.CombinedOutput() + runner.T().Logf("found pid: %s", string(pgrep)) + if err == nil { + return true + } + return false + }, time.Minute*2, time.Second) + + // make sure agent still says we're healthy + agentClient := runner.fixture.Client() + err = agentClient.Connect(ctx) + require.NoError(runner.T(), err, "could not connect to local agent") + + require.Eventually(runner.T(), + func() bool { return agentAndEndpointAreHealthy(runner.T(), ctx, agentClient) }, + time.Minute*3, + time.Second, + "Endpoint component or units are not healthy.", + ) + + // catch the time endpoint is restarted, so we can filter for documents after a given time + endpointRestarted := time.Now() + + agentStatus, err := runner.fixture.ExecStatus(ctx) + require.NoError(runner.T(), err) + + // now query again, but make sure we're getting new metrics + require.Eventually(runner.T(), func() bool { + query := genESQueryByDate(agentStatus.Info.ID, runner.endpointID, endpointRestarted.Format(time.RFC3339)) + res, err := estools.PerformQueryForRawQuery(ctx, query, "metrics-elastic_agent*", runner.info.ESClient) + require.NoError(runner.T(), err) + runner.T().Logf("Fetched metrics for %s, got %d hits", runner.endpointID, res.Hits.Total.Value) + return res.Hits.Total.Value >= 1 + }, time.Minute*10, time.Second*10, "could not fetch component metricsets for endpoint with ID %s and agent ID %s", runner.endpointID, agentStatus.Info.ID) +} + +func genESQueryByDate(agentID string, componentID string, dateAfter string) map[string]interface{} { + queryRaw := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "must": []map[string]interface{}{ + { + "match": map[string]interface{}{ + "agent.id": agentID, + }, + }, + { + "match": map[string]interface{}{ + "component.id": componentID, + }, + }, + { + "range": map[string]interface{}{ + "@timestamp": map[string]interface{}{ + "gte": dateAfter, + }, + }, + }, + { + "range": map[string]interface{}{ + "system.process.cpu.total.value": map[string]interface{}{ + "gt": 0, + }, + }, + }, + { + "range": map[string]interface{}{ + "system.process.memory.size": map[string]interface{}{ + "gt": 0, + }, + }, + }, + }, + }, + }, + } + + return queryRaw +} + +func genESQueryByBinary(agentID string, componentID string) map[string]interface{} { + // see https://github.com/elastic/kibana/blob/main/x-pack/plugins/fleet/server/services/agents/agent_metrics.ts + queryRaw := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "must": []map[string]interface{}{ + { + "match": map[string]interface{}{ + "agent.id": agentID, + }, + }, + { + "match": map[string]interface{}{ + "component.id": componentID, + }, + }, + { + "range": map[string]interface{}{ + "system.process.cpu.total.value": map[string]interface{}{ + "gt": 0, + }, + }, + }, + { + "range": map[string]interface{}{ + "system.process.memory.size": map[string]interface{}{ + "gt": 0, + }, + }, + }, + }, + }, + }, + } + + return queryRaw +} diff --git a/x-pack/agentbeat/testing/integration/monitoring_probe_preserve_text_cfg_test.go b/x-pack/agentbeat/testing/integration/monitoring_probe_preserve_text_cfg_test.go new file mode 100644 index 000000000000..79f6b1bdb6ac --- /dev/null +++ b/x-pack/agentbeat/testing/integration/monitoring_probe_preserve_text_cfg_test.go @@ -0,0 +1,206 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" +) + +var defaultTextCfg = ` +outputs: + default: + type: elasticsearch + hosts: [127.0.0.1:9200] + api_key: "example-key" + preset: balanced + allow_older_versions: true + +inputs: + - type: system/metrics + id: unique-system-metrics-input + data_stream.namespace: default + use_output: default + streams: + - metricsets: + - cpu + data_stream.dataset: system.cpu + - metricsets: + - memory + data_stream.dataset: system.memory + - metricsets: + - network + data_stream.dataset: system.network + - metricsets: + - filesystem + data_stream.dataset: system.filesystem +agent.monitoring: + metrics_period: 1s + http: + enabled: true + port: 6791 +agent.logging.level: debug +agent.logging.to_stderr: true +` + +type MonitoringTextRunner struct { + suite.Suite + info *define.Info + agentFixture *atesting.Fixture + + ESHost string + + healthCheckTime time.Duration + healthCheckRefreshTime time.Duration + + policyID string + policyName string +} + +func TestMonitoringPreserveTextConfig(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: "fleet", + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + {Type: define.Windows}, + }, + }) + + suite.Run(t, &MonitoringTextRunner{info: info, healthCheckTime: time.Minute * 5, healthCheckRefreshTime: time.Second * 5}) +} + +func (runner *MonitoringTextRunner) SetupSuite() { + fixture, err := define.NewFixtureFromLocalBuild(runner.T(), define.Version()) + require.NoError(runner.T(), err) + runner.agentFixture = fixture + + policyUUID := uuid.Must(uuid.NewV4()).String() + basePolicy := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + + // write a default config file that enables monitoring + err = runner.agentFixture.WriteFileToWorkDir(ctx, defaultTextCfg, "elastic-agent.yml") + require.NoError(runner.T(), err) + + policyResp, err := tools.InstallAgentWithPolicy(ctx, runner.T(), installOpts, runner.agentFixture, runner.info.KibanaClient, basePolicy) + require.NoError(runner.T(), err) + + runner.policyID = policyResp.ID + runner.policyName = basePolicy.Name + + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "system", "1.53.1", "system_integration_setup.json", uuid.Must(uuid.NewV4()).String(), policyResp.ID) + require.NoError(runner.T(), err) +} + +func (runner *MonitoringTextRunner) TestMonitoringLiveness() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + defer cancel() + + runner.AllComponentsHealthy(ctx) + + client := http.Client{Timeout: time.Second * 4} + endpoint := "http://localhost:6791/processes" + // first stage: ensure the HTTP config has remained up post-install + req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil) + require.NoError(runner.T(), err) + + initResp, err := client.Do(req) + require.NoError(runner.T(), err) + require.Equal(runner.T(), http.StatusOK, initResp.StatusCode) + + // use the fleet override API to change the port that we're running on. + override := map[string]interface{}{ + "name": runner.policyName, + "namespace": "default", + "overrides": map[string]interface{}{ + "agent": map[string]interface{}{ + "monitoring": map[string]interface{}{ + "http": map[string]interface{}{ + "enabled": true, + "host": "localhost", + "port": 6792, + }, + }, + }, + }, + } + + raw, err := json.Marshal(override) + require.NoError(runner.T(), err) + reader := bytes.NewBuffer(raw) + overrideEndpoint := fmt.Sprintf("/api/fleet/agent_policies/%s", runner.policyID) + statusCode, overrideResp, err := runner.info.KibanaClient.Request("PUT", overrideEndpoint, nil, nil, reader) + require.NoError(runner.T(), err) + require.Equal(runner.T(), http.StatusOK, statusCode, "non-200 status code; got response: %s", string(overrideResp)) + + runner.AllComponentsHealthy(ctx) + + updatedEndpoint := "http://localhost:6792/processes" + // second stage: ensure the HTTP config has updated + req, err = http.NewRequestWithContext(ctx, "GET", updatedEndpoint, nil) + require.NoError(runner.T(), err) + + initResp, err = client.Do(req) + require.NoError(runner.T(), err) + require.Equal(runner.T(), http.StatusOK, initResp.StatusCode) +} + +// AllComponentsHealthy ensures all the beats and agent are healthy and working before we continue +func (runner *MonitoringTextRunner) AllComponentsHealthy(ctx context.Context) { + compDebugName := "" + require.Eventually(runner.T(), func() bool { + allHealthy := true + status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } + + for _, comp := range status.Components { + runner.T().Logf("component state: %s", comp.Message) + if comp.State != int(cproto.State_HEALTHY) { + compDebugName = comp.Name + allHealthy = false + } + } + return allHealthy + }, runner.healthCheckTime, runner.healthCheckRefreshTime, "install never became healthy: components did not return a healthy state: %s", compDebugName) +} diff --git a/x-pack/agentbeat/testing/integration/monitoring_probe_reload_test.go b/x-pack/agentbeat/testing/integration/monitoring_probe_reload_test.go new file mode 100644 index 000000000000..134a0143ca4c --- /dev/null +++ b/x-pack/agentbeat/testing/integration/monitoring_probe_reload_test.go @@ -0,0 +1,183 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "testing" + "time" + + "github.com/gofrs/uuid/v5" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/kibana" + "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" +) + +type MonitoringRunner struct { + suite.Suite + info *define.Info + agentFixture *atesting.Fixture + + ESHost string + + healthCheckTime time.Duration + healthCheckRefreshTime time.Duration + + policyID string + policyName string +} + +func TestMonitoringLivenessReloadable(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: "fleet", + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + {Type: define.Windows}, + }, + }) + + suite.Run(t, &MonitoringRunner{info: info, healthCheckTime: time.Minute * 5, healthCheckRefreshTime: time.Second * 5}) +} + +func (runner *MonitoringRunner) SetupSuite() { + fixture, err := define.NewFixtureFromLocalBuild(runner.T(), define.Version()) + require.NoError(runner.T(), err) + runner.agentFixture = fixture + + policyUUID := uuid.Must(uuid.NewV4()).String() + basePolicy := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Privileged: true, + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + + policyResp, err := tools.InstallAgentWithPolicy(ctx, runner.T(), installOpts, runner.agentFixture, runner.info.KibanaClient, basePolicy) + require.NoError(runner.T(), err) + + runner.policyID = policyResp.ID + runner.policyName = basePolicy.Name + + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "system", "1.53.1", "system_integration_setup.json", uuid.Must(uuid.NewV4()).String(), policyResp.ID) + require.NoError(runner.T(), err) +} + +func (runner *MonitoringRunner) TestMonitoringLiveness() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + defer cancel() + + runner.AllComponentsHealthy(ctx) + + client := http.Client{Timeout: time.Second * 4} + endpoint := "http://localhost:6792/liveness" + // first stage: ensure the default behavior, http monitoring is off. This should return an error + req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil) + require.NoError(runner.T(), err) + + _, err = client.Do(req) + require.Error(runner.T(), err) + + // use the fleet override API to enable http monitoring. + // This tests both the http server itself, and tests that the agent reloader actually reloads the agent config. + override := map[string]interface{}{ + "name": runner.policyName, + "namespace": "default", + "overrides": map[string]interface{}{ + "agent": map[string]interface{}{ + "monitoring": map[string]interface{}{ + "http": map[string]interface{}{ + "enabled": true, + "host": "localhost", + "port": 6792, + }, + }, + }, + }, + } + + raw, err := json.Marshal(override) + require.NoError(runner.T(), err) + reader := bytes.NewBuffer(raw) + overrideEndpoint := fmt.Sprintf("/api/fleet/agent_policies/%s", runner.policyID) + statusCode, overrideResp, err := runner.info.KibanaClient.Request("PUT", overrideEndpoint, nil, nil, reader) + require.NoError(runner.T(), err) + require.Equal(runner.T(), http.StatusOK, statusCode, "non-200 status code; got response: %s", string(overrideResp)) + + runner.AllComponentsHealthy(ctx) + + // check to make sure that we now have a liveness probe response + req, err = http.NewRequestWithContext(ctx, "GET", endpoint, nil) + require.NoError(runner.T(), err) + + // second check: the /liveness endpoint should now be responding + runner.CheckResponse(ctx, endpoint) + + runner.CheckResponse(ctx, fmt.Sprintf("%s?failon=degraded", endpoint)) + + runner.CheckResponse(ctx, fmt.Sprintf("%s?failon=failed", endpoint)) + + runner.CheckResponse(ctx, fmt.Sprintf("%s?failon=heartbeat", endpoint)) +} + +// CheckResponse checks to see if the liveness probe returns a 200 +func (runner *MonitoringRunner) CheckResponse(ctx context.Context, endpoint string) { + req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil) + require.NoError(runner.T(), err) + + client := http.Client{Timeout: time.Second * 4} + + livenessResp, err := client.Do(req) + require.NoError(runner.T(), err) + defer livenessResp.Body.Close() + require.Equal(runner.T(), http.StatusOK, livenessResp.StatusCode) // this is effectively the check for the test +} + +// AllComponentsHealthy ensures all the beats and agent are healthy and working before we continue +func (runner *MonitoringRunner) AllComponentsHealthy(ctx context.Context) { + compDebugName := "" + require.Eventually(runner.T(), func() bool { + allHealthy := true + status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } + + for _, comp := range status.Components { + runner.T().Logf("component state: %s", comp.Message) + if comp.State != int(cproto.State_HEALTHY) { + compDebugName = comp.Name + allHealthy = false + } + } + return allHealthy + }, runner.healthCheckTime, runner.healthCheckRefreshTime, "install never became healthy: components did not return a healthy state: %s", compDebugName) +} diff --git a/x-pack/agentbeat/testing/integration/otel_test.go b/x-pack/agentbeat/testing/integration/otel_test.go new file mode 100644 index 000000000000..7e9ce375c848 --- /dev/null +++ b/x-pack/agentbeat/testing/integration/otel_test.go @@ -0,0 +1,617 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build integration + +package integration + +import ( + "bytes" + "context" + "fmt" + "net/url" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/pkg/control/v2/client" + aTesting "github.com/elastic/elastic-agent/pkg/testing" + atesting "github.com/elastic/elastic-agent/pkg/testing" + integrationtest "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" + "github.com/elastic/elastic-agent/pkg/testing/tools/testcontext" + "github.com/elastic/go-elasticsearch/v8" +) + +const fileProcessingFilename = `/tmp/testfileprocessing.json` + +var fileProcessingConfig = []byte(`receivers: + filelog: + include: [ "/var/log/system.log", "/var/log/syslog" ] + start_at: beginning + +exporters: + file: + path: ` + fileProcessingFilename + ` +service: + pipelines: + logs: + receivers: [filelog] + exporters: + - file`) + +var fileInvalidOtelConfig = []byte(`receivers: + filelog: + include: [ "/var/log/system.log", "/var/log/syslog" ] + start_at: beginning + +exporters: + file: + path: ` + fileProcessingFilename + ` +service: + pipelines: + logs: + receivers: [filelog] + processors: [nonexistingprocessor] + exporters: + - file`) + +const apmProcessingContent = `2023-06-19 05:20:50 ERROR This is a test error message +2023-06-20 12:50:00 DEBUG This is a test debug message 2 +2023-06-20 12:51:00 DEBUG This is a test debug message 3 +2023-06-20 12:52:00 DEBUG This is a test debug message 4` + +const apmOtelConfig = `receivers: + filelog: + include: [ %s ] + operators: + - type: regex_parser + regex: '^(?P