Skip to content

Commit

Permalink
Merge pull request kubernetes#26364 from jupblb/b232589040
Browse files Browse the repository at this point in the history
Add optional presubmit with 5k nodes for perf-tests repo
  • Loading branch information
k8s-ci-robot authored Jun 14, 2022
2 parents ed7ecaa + 86342b0 commit ad0ae57
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ presubmits:
- --extract=local
- --flush-mem-after-build=true
- --gcp-nodes=500
- --gcp-project=k8s-presubmit-scale
- --gcp-project-type=scalability-presubmit-5k-project
- --gcp-zone=us-east1-b
- --provider=gce
- --stage=gs://kubernetes-release-pull/ci/pull-kubernetes-e2e-gce-big-performance
Expand Down Expand Up @@ -262,7 +262,7 @@ presubmits:
- --extract=local
- --flush-mem-after-build=true
- --gcp-nodes=5000
- --gcp-project=k8s-presubmit-scale
- --gcp-project-type=scalability-presubmit-5k-project
- --gcp-zone=us-east1-b
- --provider=gce
- --stage=gs://kubernetes-release-pull/ci/pull-kubernetes-e2e-gce-scale-performance-manual
Expand Down Expand Up @@ -672,6 +672,95 @@ presubmits:
securityContext:
privileged: true

# Fork of kubernetes/kubernetes: pull-kubernetes-e2e-gce-scale-performance-manual
- name: pull-perf-tests-clusterloader2-e2e-gce-scale-performance-manual
always_run: false
max_concurrency: 1
branches:
- master
decorate: true
path_alias: k8s.io/perf-tests
decoration_config:
timeout: 450m
extra_refs:
- org: kubernetes
repo: release
base_ref: master
path_alias: k8s.io/release
labels:
preset-service-account: "true"
preset-k8s-ssh: "true"
preset-dind-enabled: "true"
preset-e2e-scalability-common: "true"
preset-e2e-scalability-presubmits: "true"
annotations:
testgrid-dashboards: presubmits-kubernetes-scalability
testgrid-tab-name: pull-perf-tests-clusterloader2-e2e-gce-scale-performance
spec:
containers:
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20220514-17efd5d2c3-master
command:
- runner.sh
- /workspace/scenarios/kubernetes_e2e.py
args:
- --cluster=
- --env=HEAPSTER_MACHINE_TYPE=e2-standard-32
# TODO(mborsz): Adjust or remove this change once we understand coredns
# memory usage regression.
- --env=KUBE_DNS_MEMORY_LIMIT=300Mi
- --extract=ci/latest-fast
- --extract-ci-bucket=k8s-release-dev
- --gcp-nodes=5000
- --gcp-project-type=scalability-presubmit-5k-project
- --gcp-zone=us-east1-b
- --provider=gce
- --metadata-sources=cl2-metadata.json
- --env=CL2_LOAD_TEST_THROUGHPUT=50
- --env=CL2_DELETE_TEST_THROUGHPUT=50
# Overrides CONTROLLER_MANAGER_TEST_ARGS from preset-e2e-scalability-periodics.
- --env=CONTROLLER_MANAGER_TEST_ARGS=--profiling --kube-api-qps=100 --kube-api-burst=100 --endpointslice-updates-batch-period=500ms --endpoint-updates-batch-period=500ms
# Overrides SCHEDULER_TEST_ARGS from preset-e2e-scalability-periodics.
# TODO(#1311): Clean this up after the experiment - it should allow
# to hugely decrease pod-startup-latency across the whole test.
# Given that individual controllers have separate QPS limits, we allow
# scheduler to keep up with the load from deployment, daemonset and job
# performing pod creations at once.
- --env=SCHEDULER_TEST_ARGS=--profiling --kube-api-qps=300 --kube-api-burst=300
# With APF only sum of --max-requests-inflight and --max-mutating-requests-inflight matters, so set --max-mutating-requests-inflight to 0.
- --env=APISERVER_TEST_ARGS=--max-requests-inflight=640 --max-mutating-requests-inflight=0
- --env=CL2_ENABLE_API_AVAILABILITY_MEASUREMENT=true
- --env=CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD=99.5
- --test=false
- --test-cmd=$GOPATH/src/k8s.io/perf-tests/run-e2e.sh
- --test-cmd-args=cluster-loader2
- --test-cmd-args=--experimental-gcp-snapshot-prometheus-disk=true
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
- --test-cmd-args=--nodes=5000
- --test-cmd-args=--prometheus-scrape-node-exporter
- --test-cmd-args=--provider=gce
- --test-cmd-args=--report-dir=$(ARTIFACTS)
- --test-cmd-args=--testconfig=testing/load/config.yaml
- --test-cmd-args=--testconfig=testing/huge-service/config.yaml
- --test-cmd-args=--testconfig=testing/access-tokens/config.yaml
- --test-cmd-args=--testoverrides=./testing/experiments/enable_restart_count_check.yaml
- --test-cmd-args=--testoverrides=./testing/experiments/ignore_known_gce_container_restarts.yaml
- --test-cmd-args=--testoverrides=./testing/overrides/5000_nodes.yaml
- --test-cmd-name=ClusterLoaderV2
- --timeout=420m
- --use-logexporter
- --logexporter-gcs-path=gs://sig-scalability-logs/$(JOB_NAME)/$(BUILD_ID)
resources:
limits:
# Using 6 CPU to speed up bazel build phase (4 is enough for the test itself)
cpu: 6
memory: "16Gi"
requests:
cpu: 6
memory: "16Gi"
securityContext:
privileged: true

- name: pull-perf-tests-util-images
always_run: false
skip_report: false
Expand Down
2 changes: 1 addition & 1 deletion config/prow/cluster/build/boskos-janitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
image: gcr.io/k8s-staging-boskos/janitor:v20220516-d007e44
args:
- --boskos-url=http://boskos.test-pods.svc.cluster.local.
- --resource-type=gce-project,gpu-project,ingress-project,istio-project,scalability-presubmit-project,scalability-project,node-e2e-project
- --resource-type=gce-project,gpu-project,ingress-project,istio-project,scalability-presubmit-project,scalability-presubmit-5k-project,scalability-project,node-e2e-project
- --pool-size=20
- --
- --hours=0
Expand Down
2 changes: 1 addition & 1 deletion config/prow/cluster/build/boskos-reaper_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ spec:
image: gcr.io/k8s-staging-boskos/reaper:v20220516-d007e44
args:
- --boskos-url=http://boskos.test-pods.svc.cluster.local.
- --resource-type=gce-project,gpu-project,ingress-project,istio-project,scalability-presubmit-project,scalability-project,aws-account,node-e2e-project
- --resource-type=gce-project,gpu-project,ingress-project,istio-project,scalability-presubmit-project,scalability-presubmit-5k-project,scalability-project,aws-account,node-e2e-project
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ resources:
- k8s-presubmit-scale-45
state: dirty
type: scalability-presubmit-project
- names:
- k8s-presubmit-scale
state: dirty
type: scalability-presubmit-5k-project
- names:
- k8s-jkns-gke-ubuntu
- k8s-jkns-gke-ubuntu-1-6
Expand Down
1 change: 1 addition & 0 deletions config/prow/cluster/monitoring/mixins/lib/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ local config = {
{job: "k8s-prow-builds-new-boskos", type: "scalability-project", friendly: "Scalability project"},
{job: "k8s-infra-prow-builds-boskos", type: "scalability-project", friendly: "Scalability project (k8s-infra)"},
{job: "k8s-prow-builds-new-boskos", type: "scalability-presubmit-project", friendly: "Scalability presubmit project"}
{job: "k8s-prow-builds-new-boskos", type: "scalability-presubmit-5k-project", friendly: "Scalability presubmit project (5k)"}
],

// How long we go during work hours without seeing a webhook before alerting.
Expand Down
1 change: 0 additions & 1 deletion experiment/ci-janitor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ var (
"k8s-jkns-pr-kubemark",
"k8s-jkns-pr-node-e2e",
"k8s-jkns-pr-gce-gpus",
"k8s-presubmit-scale",
// k8s-infra projects, can't be cleaned by k8s-prow serviceaccounts
"k8s-infra-e2e-scale-5k-project",
"k8s-infra-e2e-gpu-project",
Expand Down
12 changes: 1 addition & 11 deletions scenarios/kubernetes_janitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,6 @@ def clean_project(project, hours=24, dryrun=False, ratelimit=None, filt=None):
'k8s-jkns-pr-gce-gpus': 3,
}

SCALE_PROJECT = {
# cleans up resources older than 12h
# for scale presubmit job we need to give jobs enough time to finish.
'k8s-presubmit-scale': 12,
}

def check_predefine_jobs(jobs, ratelimit):
"""Handle predefined jobs"""
for project, expire in jobs.iteritems():
Expand All @@ -131,8 +125,6 @@ def check_ci_jobs():
if any(b in project for b in EXEMPT_PROJECTS):
print >>sys.stderr, 'Project %r is exempted in ci-janitor' % project
continue
if project in PR_PROJECTS or project in SCALE_PROJECT:
continue # CI janitor skips all PR jobs
found = project
if found:
clean_project(found, clean_hours)
Expand All @@ -142,8 +134,6 @@ def main(mode, ratelimit, projects, age, artifacts, filt):
"""Run janitor for each project."""
if mode == 'pr':
check_predefine_jobs(PR_PROJECTS, ratelimit)
elif mode == 'scale':
check_predefine_jobs(SCALE_PROJECT, ratelimit)
elif mode == 'custom':
projs = str.split(projects, ',')
for proj in projs:
Expand Down Expand Up @@ -183,7 +173,7 @@ def main(mode, ratelimit, projects, age, artifacts, filt):
VERBOSE = False
PARSER = argparse.ArgumentParser()
PARSER.add_argument(
'--mode', default='ci', choices=['ci', 'pr', 'scale', 'custom'],
'--mode', default='ci', choices=['ci', 'pr', 'custom'],
help='Which type of projects to clear')
PARSER.add_argument(
'--ratelimit', type=int,
Expand Down

0 comments on commit ad0ae57

Please sign in to comment.