diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 81476c323..9fa3b793e 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -222,6 +222,11 @@ jobs: helm_version: "" experimental: false + - federation_member: hetzner-2i2c + chartpress_args: "" + helm_version: "" + experimental: false + # OVH deployment paused # - federation_member: ovh2 # helm_version: "" diff --git a/WISDOM.md b/WISDOM.md index e354ef17b..ab3853cfa 100644 --- a/WISDOM.md +++ b/WISDOM.md @@ -2,3 +2,4 @@ - When you are in an outage, focus only on fixing the outage - do not try to do anything else. - Prefer minor annoyances happening infrequently but at regular intervals, rather than major annoyances happening rarely but at unpredictable intervals. +- Sometimes, surviving is winning. diff --git a/config/hetzner-2i2c.yaml b/config/hetzner-2i2c.yaml new file mode 100644 index 000000000..379e9081a --- /dev/null +++ b/config/hetzner-2i2c.yaml @@ -0,0 +1,137 @@ +projectName: hetzner-2i2c + +registry: + enabled: true + config: + storage: + # Uncomment this and comment out the s3 config to use filesystem + # filesystem: + # rootdirectory: /var/lib/registry + s3: + regionendpoint: https://fsn1.your-objectstorage.com + bucket: mybinder-2i2c-registry-hetzner + region: does-not-matter + storage: + filesystem: + storageClassName: "local-path" + ingress: + hosts: + - registry.2i2c.mybinder.org + +cryptnono: + detectors: + monero: + enabled: false + +binderhub: + config: + BinderHub: + hub_url: https://hub.2i2c.mybinder.org + badge_base_url: https://mybinder.org + sticky_builds: true + image_prefix: registry.2i2c.mybinder.org/i- + # image_prefix: quay.io/mybinder-hetzner-2i2c/image- + # build_docker_host: /var/run/dind/docker.sock + # TODO: we should have CPU requests, too + # use this to limit the number of builds per node + # complicated: dind memory request + KubernetesBuildExecutor.memory_request * builds_per_node ~= node memory + KubernetesBuildExecutor: + memory_request: "2G" + docker_host: /var/run/dind/docker.sock + + LaunchQuota: + total_quota: 300 + + # DockerRegistry: + # token_url: "https://2lmrrh8f.gra7.container-registry.ovh.net/service/token?service=harbor-registry" + + replicas: 1 + + extraVolumes: + - name: secrets + secret: + secretName: events-archiver-secrets + extraVolumeMounts: + - name: secrets + mountPath: /secrets + readOnly: true + extraEnv: + GOOGLE_APPLICATION_CREDENTIALS: /secrets/service-account.json + + dind: {} + + ingress: + hosts: + - 2i2c.mybinder.org + + jupyterhub: + # proxy: + # chp: + # resources: + # requests: + # cpu: "1" + # limits: + # cpu: "1" + ingress: + hosts: + - hub.2i2c.mybinder.org + tls: + - secretName: kubelego-tls-hub + hosts: + - hub.2i2c.mybinder.org + + imageCleaner: + # Use 40GB as upper limit, size is given in bytes + imageGCThresholdHigh: 40e9 + imageGCThresholdLow: 30e9 + imageGCThresholdType: "absolute" + +grafana: + ingress: + hosts: + - grafana.2i2c.mybinder.org + tls: + - hosts: + - grafana.2i2c.mybinder.org + secretName: kubelego-tls-grafana + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: prometheus + orgId: 1 + type: prometheus + url: https://prometheus.2i2c.mybinder.org + access: direct + isDefault: true + editable: false + # persistence: + # storageClassName: csi-cinder-high-speed + +prometheus: + server: + persistentVolume: + size: 50Gi + retention: 30d + ingress: + hosts: + - prometheus.2i2c.mybinder.org + tls: + - hosts: + - prometheus.2i2c.mybinder.org + secretName: kubelego-tls-prometheus + +ingress-nginx: + controller: + replicas: 1 + scope: + enabled: true + service: + loadBalancerIP: 116.203.245.43 + +static: + ingress: + hosts: + - static.2i2c.mybinder.org + tls: + secretName: kubelego-tls-static diff --git a/config/prod.yaml b/config/prod.yaml index 807d25744..bdd219054 100644 --- a/config/prod.yaml +++ b/config/prod.yaml @@ -228,10 +228,16 @@ federationRedirect: weight: 0 health: https://gke.mybinder.org/health versions: https://gke.mybinder.org/versions - gesis: + hetzner-2i2c: prime: true - url: https://notebooks.gesis.org/binder + url: https://2i2c.mybinder.org weight: 60 + health: https://2i2c.mybinder.org/health + versions: https://2i2c.mybinder.org/versions + gesis: + prime: false + url: https://notebooks.gesis.org/binder + weight: 40 health: https://notebooks.gesis.org/binder/health versions: https://notebooks.gesis.org/binder/versions ovh2: diff --git a/deploy.py b/deploy.py index 889e68877..5bbf46bf4 100755 --- a/deploy.py +++ b/deploy.py @@ -30,6 +30,9 @@ "prod": "us-central1", } +# Projects using raw KUBECONFIG files +KUBECONFIG_CLUSTERS = {"ovh2", "hetzner-2i2c"} + # Mapping of config name to cluster name for AWS EKS deployments AWS_DEPLOYMENTS = {"curvenote": "binderhub"} @@ -100,17 +103,15 @@ def setup_auth_azure(cluster, dry_run=False): print(stdout) -def setup_auth_ovh(release, cluster, dry_run=False): +def setup_auth_kubeconfig(release, cluster, dry_run=False): """ - Set up authentication with 'ovh' K8S from the ovh-kubeconfig.yml + Setup authentication with a pure kubeconfig file """ - print(f"Setup the OVH authentication for namespace {release}") + print(f"Setup authentication for namespace {release} with kubeconfig") - ovh_kubeconfig = os.path.join(ABSOLUTE_HERE, "secrets", f"{release}-kubeconfig.yml") - os.environ["KUBECONFIG"] = ovh_kubeconfig - print(f"Current KUBECONFIG='{ovh_kubeconfig}'") - stdout = check_output(["kubectl", "config", "use-context", cluster], dry_run) - print(stdout) + kubeconfig = os.path.join(ABSOLUTE_HERE, "secrets", f"{release}-kubeconfig.yml") + os.environ["KUBECONFIG"] = kubeconfig + print(f"Current KUBECONFIG='{kubeconfig}'") def setup_auth_gcloud(release, cluster=None, dry_run=False): @@ -436,13 +437,7 @@ def main(): argparser.add_argument( "release", help="Release to deploy", - choices=[ - "staging", - "prod", - "ovh", - "ovh2", - "curvenote", - ], + choices=["staging", "prod", "ovh", "ovh2", "curvenote", "hetzner-2i2c"], ) argparser.add_argument( "--name", @@ -511,8 +506,8 @@ def main(): # script is running on CI, proceed with auth and helm setup if args.stage in ("all", "auth"): - if cluster.startswith("ovh"): - setup_auth_ovh(args.release, cluster, args.dry_run) + if cluster in KUBECONFIG_CLUSTERS: + setup_auth_kubeconfig(args.release, cluster, args.dry_run) patch_coredns(args.dry_run, args.diff) elif cluster in AZURE_RGs: setup_auth_azure(cluster, args.dry_run) diff --git a/docs/source/deployment/index.rst b/docs/source/deployment/index.rst index 5ab6e4f25..a30909a1c 100644 --- a/docs/source/deployment/index.rst +++ b/docs/source/deployment/index.rst @@ -8,3 +8,4 @@ Deployment and Operation prereqs how what + k3s diff --git a/docs/source/deployment/k3s.md b/docs/source/deployment/k3s.md new file mode 100644 index 000000000..b0a5f3015 --- /dev/null +++ b/docs/source/deployment/k3s.md @@ -0,0 +1,85 @@ +# Deploy a new mybinder.org federation member on a bare VM with `k3s` + +[k3s](https://k3s.io/) is a popular kubernetes distribution that we can use +to build _single node_ kubernetes installations that satisfy the needs of the +mybinder project. By focusing on the simplest possible kubernetes installation, +we can get all the benefits of kubernetes (simplified deployment, cloud agnosticity, +unified tooling, etc) **except** autoscaling, and deploy **anywhere we can get a VM +with root access**. This is vastly simpler than managing an autoscaling kubernetes +cluster, and allows expansion of the mybinder federation in ways that would otherwise +be more difficult. + +## VM requirements + +The k3s project publishes [their requirements](https://docs.k3s.io/installation/requirements?), +but we have a slightly more opinionated list. + +1. We must have full `root` access. +2. Runs latest Ubuntu LTS (currently 24.04). Debian is acceptable. +3. Direct internet access, inbound (public IP) and outbound. +4. "As big as possible", as we will be using all the capacity of this one VM +5. Ability to grant same access to the VM to all the operators of the mybinder federation. + +## Installing `k3s` + +We can use the [quickstart](https://docs.k3s.io/quick-start) on the `k3s` website, with the added +config of _disabling traefik_ that comes built in. We deploy nginx as part of our deployment, so we +do not need traefik. + +1. Create a Kubelet Config file in `/etc/kubelet.yaml` so we can + tweak various kubelet options, including maximum number of pods on a single + node: + + ```yaml + apiVersion: kubelet.config.k8s.io/v1beta1 + kind: KubeletConfiguration + maxPods: 300 + ``` + + We will need to develop better intuition for how many pods per node, but given we offer about + 450M of RAM per user, and RAM is the limiting factor (not CPU), let's roughly start with the + following formula to determine this: + + maxPods = 1.75 \* amount of ram in GB + + This adds a good amount of margin. We can tweak this later + +2. Install `k3s`! + + ```bash + curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --kubelet-arg=config=/etc/kubelet.yaml" sh -s - --disable=traefik + ``` + + This runs for a minute, but should set up latest `k3s` on that node! You can verify that by running + `kubectl get node` and `kubectl version`. + +## Extracting authentication information via a `KUBECONFIG` file + +Follow https://docs.k3s.io/cluster-access#accessing-the-cluster-from-outside-with-kubectl + +## Setup DNS entries + +There's only one IP to set DNS entries for - the public IP of the VM. No loadbalancers or similar here. + +mybinder.org's DNS is managed via Cloudflare. You should have access, or ask someone in the mybinder team who does! + +Add the following entries: + +- An `A` record for `X.mybinder.org` pointing to wards the public IP. `X` should be an organizational identifier that identifies and thanks whoever is donating this. +- Another `A` record for `*.X.mybinder.org` to the same public IP + +Give this a few minutes because it may take a while to propagate. + +## Make a config copy for this new member + +TODO + +## Make a secret config for this new member + +TODO + +## Deploy binder! + +## Test and validate + +## Add to the redirector diff --git a/mybinder/templates/netpol.yaml b/mybinder/templates/netpol.yaml index b2ab2ab92..918350ea3 100644 --- a/mybinder/templates/netpol.yaml +++ b/mybinder/templates/netpol.yaml @@ -73,7 +73,7 @@ spec: to: - podSelector: matchLabels: - app: nginx-ingress - component: controller + app.kubernetes.io/component: controller + app.kubernetes.io/name: ingress-nginx {{- end }} diff --git a/mybinder/templates/registry/configmap.yaml b/mybinder/templates/registry/configmap.yaml new file mode 100644 index 000000000..e3fae2457 --- /dev/null +++ b/mybinder/templates/registry/configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.registry.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: registry-config + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +data: + config.yml: | + {{ .Values.registry.config | toJson }} +{{- end }} diff --git a/mybinder/templates/registry/deployment.yaml b/mybinder/templates/registry/deployment.yaml new file mode 100644 index 000000000..ed1270149 --- /dev/null +++ b/mybinder/templates/registry/deployment.yaml @@ -0,0 +1,62 @@ +{{- if .Values.registry.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: registry + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: registry +spec: + replicas: {{ .Values.registry.replicas }} + selector: + matchLabels: + app: registry + release: {{ .Release.Name }} + component: registry + template: + metadata: + annotations: + checksum/registry-config: {{ include (print $.Template.BasePath "/registry/configmap.yaml") . | sha256sum }} + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: registry + spec: + automountServiceAccountToken: false + nodeSelector: {{ toJson .Values.registry.nodeSelector }} + volumes: + - name: registry-config + configMap: + name: registry-config + - name: registry-secret + secret: + secretName: registry-secret + - name: registry-storage + persistentVolumeClaim: + claimName: registry + containers: + - name: registry + image: registry:3.0.0-rc.2 + volumeMounts: + - name: registry-config + # This path is what registry documentation *says* we should put + # our config files in + mountPath: /etc/distribution/config.yml + subPath: config.yml + - name: registry-config + # This path is what registry *actually* seems to read lol + mountPath: /etc/docker/registry/config.yml + subPath: config.yml + - name: registry-storage + mountPath: /var/lib/registry + - name: registry-secret + mountPath: /etc/distribution/auth.htpasswd + subPath: auth.htpasswd + {{- with .Values.registry.resources }} + resources: + {{- . | toYaml | nindent 10 }} + {{- end }} +{{- end }} diff --git a/mybinder/templates/registry/ingress.yaml b/mybinder/templates/registry/ingress.yaml new file mode 100644 index 000000000..0a8746886 --- /dev/null +++ b/mybinder/templates/registry/ingress.yaml @@ -0,0 +1,35 @@ +{{- if .Values.registry.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: registry + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + annotations: + kubernetes.io/tls-acme: "true" + # things be big yo + nginx.ingress.kubernetes.io/proxy-body-size: 4096m +spec: + ingressClassName: nginx + rules: + {{- range $host := .Values.registry.ingress.hosts }} + - host: {{ $host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: registry + port: + number: 5000 + {{- end }} + tls: + - secretName: tls-registry + hosts: + {{- range $host := .Values.registry.ingress.hosts }} + - {{ $host }} + {{- end }} +{{- end }} diff --git a/mybinder/templates/registry/pvc.yaml b/mybinder/templates/registry/pvc.yaml new file mode 100644 index 000000000..4c82f3f6b --- /dev/null +++ b/mybinder/templates/registry/pvc.yaml @@ -0,0 +1,17 @@ +{{- if .Values.registry.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: registry + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + storageClassName: {{ .Values.registry.storage.filesystem.storageClassName }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.registry.storage.filesystem.size }} +{{- end }} diff --git a/mybinder/templates/registry/secret.yaml b/mybinder/templates/registry/secret.yaml new file mode 100644 index 000000000..b05b8c7c1 --- /dev/null +++ b/mybinder/templates/registry/secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.registry.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: registry-secret + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +type: Opaque +data: + auth.htpasswd: {{ htpasswd .Values.registry.auth.username .Values.registry.auth.password | b64enc }} +{{- end }} diff --git a/mybinder/templates/registry/service.yaml b/mybinder/templates/registry/service.yaml new file mode 100644 index 000000000..1819bf8f8 --- /dev/null +++ b/mybinder/templates/registry/service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.registry.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: registry + labels: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + type: {{ .Values.registry.service.type }} + selector: + app: registry + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + ports: + - name: registry + protocol: TCP + port: 5000 +{{- end }} diff --git a/mybinder/values.yaml b/mybinder/values.yaml index b5e14e1d7..1a8147183 100644 --- a/mybinder/values.yaml +++ b/mybinder/values.yaml @@ -10,6 +10,32 @@ cryptnono: containerdHostPath: /run/containerd/containerd.sock dockerHostPath: /run/dind/docker.sock +registry: + enabled: false + replicas: 2 + # Passed through to docker distribution / registry config + # https://distribution.github.io/distribution/about/configuration/ + config: + version: 0.1 + auth: + htpasswd: + realm: basic-realm + path: /etc/distribution/auth.htpasswd + + http: + addr: :5000 + log: + level: debug + accesslog: + disabled: false + service: + type: ClusterIP + storage: + filesystem: + storageClassName: "" + # Size is currently ignored- using local path provisioner + size: 10Gi + imagePullSecrets: tags: {} @@ -152,7 +178,7 @@ binderhub: 🤍 Donate to mybinder.org!