diff --git a/README.md b/README.md index 3d7cdeb1f9b..db5231425b1 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,7 @@ Katib supports Katib is the project which is agnostic to machine learning (ML) frameworks. It can tune hyperparameters of applications written in any language of the users’ choice and natively supports many ML frameworks, such as -[TensorFlow](https://www.tensorflow.org/), [Apache MXNet](https://mxnet.apache.org/), -[PyTorch](https://pytorch.org/), [XGBoost](https://xgboost.readthedocs.io/en/latest/), and others. +[TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), [XGBoost](https://xgboost.readthedocs.io/en/latest/), and others. Katib can perform training jobs using any Kubernetes [Custom Resources](https://www.kubeflow.org/docs/components/katib/trial-template/) diff --git a/docs/images-location.md b/docs/images-location.md index c20b60a43e4..ed330ac7bfe 100644 --- a/docs/images-location.md +++ b/docs/images-location.md @@ -271,17 +271,6 @@ The following table shows images for training containers which are used in the Dockerfile -
docker.io/bytepsimage/mxnet
- docker.io/kubeflowkatib/xgboost-lightgbm
diff --git a/examples/v1beta1/README.md b/examples/v1beta1/README.md
index f971ae29862..063d762da6f 100644
--- a/examples/v1beta1/README.md
+++ b/examples/v1beta1/README.md
@@ -122,8 +122,6 @@ Check the following examples for the various distributed operators:
- [PyTorchJob MNIST](./kubeflow-training-operator/pytorchjob-mnist.yaml)
-- [MXJob BytePS](./kubeflow-training-operator/mxjob-byteps.yaml)
-
- [XGBoostJob LightGBM](./kubeflow-training-operator/xgboostjob-lightgbm.yaml)
- [MPIJob Horovod](./kubeflow-training-operator/mpijob-horovod.yaml)
diff --git a/examples/v1beta1/kubeflow-pipelines/early-stopping.ipynb b/examples/v1beta1/kubeflow-pipelines/early-stopping.ipynb
index 422cc1ff90a..8866f621ad5 100644
--- a/examples/v1beta1/kubeflow-pipelines/early-stopping.ipynb
+++ b/examples/v1beta1/kubeflow-pipelines/early-stopping.ipynb
@@ -9,7 +9,7 @@
"In this notebook you will:\n",
"- Create Katib Experiment using random algorithm.\n",
"- Use median stopping rule as an early stopping algorithm.\n",
- "- Use Kubernetes Job with mxnet mnist training container as a Trial template.\n",
+ "- Use Kubernetes Job with pytorch mnist training container as a Trial template.\n",
"- Create Pipeline to get the optimal hyperparameters.\n",
"\n",
"Reference documentation:\n",
diff --git a/examples/v1beta1/kubeflow-training-operator/mxjob-byteps.yaml b/examples/v1beta1/kubeflow-training-operator/mxjob-byteps.yaml
deleted file mode 100644
index 1d2fa656a62..00000000000
--- a/examples/v1beta1/kubeflow-training-operator/mxjob-byteps.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
----
-apiVersion: kubeflow.org/v1beta1
-kind: Experiment
-metadata:
- namespace: kubeflow
- name: mxjob-byteps
-spec:
- objective:
- type: maximize
- goal: 0.99
- objectiveMetricName: Train-accuracy
- algorithm:
- algorithmName: random
- parallelTrialCount: 1
- maxTrialCount: 4
- maxFailedTrialCount: 3
- parameters:
- - name: lr
- parameterType: double
- feasibleSpace:
- min: "0.1"
- max: "0.11"
- trialTemplate:
- primaryContainerName: mxnet
- # In this example we can collect metrics only from the Worker pods.
- primaryPodLabels:
- training.kubeflow.org/replica-type: worker
- trialParameters:
- - name: learningRate
- description: Learning rate for the training model
- reference: lr
- trialSpec:
- apiVersion: kubeflow.org/v1
- kind: MXJob
- spec:
- jobMode: MXTrain
- runPolicy:
- cleanPodPolicy: None
- mxReplicaSpecs:
- Scheduler:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- containers:
- - name: mxnet
- image: docker.io/bytepsimage/mxnet
- command: ["bpslaunch"]
- Server:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- containers:
- - name: mxnet
- image: docker.io/bytepsimage/mxnet
- command: ["bpslaunch"]
- Worker:
- replicas: 1
- restartPolicy: Never
- template:
- spec:
- containers:
- - name: mxnet
- image: docker.io/bytepsimage/mxnet
- command: ["bpslaunch"]
- args:
- [
- "python3",
- "/usr/local/byteps/example/mxnet/train_imagenet_byteps.py",
- "--benchmark",
- "1",
- "--lr=${trialParameters.learningRate}",
- "--num-examples=1000",
- "--num-epochs=4",
- ]
- volumeMounts:
- - mountPath: /dev/shm
- name: dshm
- resources:
- limits:
- nvidia.com/gpu: 1
- volumes:
- - name: dshm
- emptyDir:
- medium: Memory
diff --git a/hack/gen-python-sdk/post_gen.py b/hack/gen-python-sdk/post_gen.py
index 1803bb20430..f61f6c8d227 100644
--- a/hack/gen-python-sdk/post_gen.py
+++ b/hack/gen-python-sdk/post_gen.py
@@ -59,9 +59,6 @@ def _rewrite_helper(input_file, output_file, rewrite_rules):
lines.append(
"from kubeflow.katib.constants.constants import BASE_IMAGE_PYTORCH\n"
)
- lines.append(
- "from kubeflow.katib.constants.constants import BASE_IMAGE_MXNET\n"
- )
# Add Kubernetes models to proper deserialization of Katib models.
if output_file == "sdk/python/v1beta1/kubeflow/katib/models/__init__.py":
diff --git a/manifests/v1beta1/components/controller/rbac.yaml b/manifests/v1beta1/components/controller/rbac.yaml
index e93fbd031cf..9eec0229645 100644
--- a/manifests/v1beta1/components/controller/rbac.yaml
+++ b/manifests/v1beta1/components/controller/rbac.yaml
@@ -97,7 +97,6 @@ rules:
- pytorchjobs
- mpijobs
- xgboostjobs
- - mxjobs
verbs:
- "get"
- "list"
diff --git a/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml b/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml
index c7be5cd231f..11b97ce95bc 100644
--- a/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-cert-manager/katib-config.yaml
@@ -10,7 +10,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
diff --git a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml
index 2aaf0496b10..3f6af8ba506 100644
--- a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml
@@ -12,7 +12,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
diff --git a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
index 875859cd75b..e027fefc4dc 100644
--- a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
@@ -13,7 +13,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
diff --git a/manifests/v1beta1/installs/katib-openshift/katib-config.yaml b/manifests/v1beta1/installs/katib-openshift/katib-config.yaml
index c7be5cd231f..11b97ce95bc 100644
--- a/manifests/v1beta1/installs/katib-openshift/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-openshift/katib-config.yaml
@@ -10,7 +10,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml
index 2aaf0496b10..3f6af8ba506 100644
--- a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml
@@ -12,7 +12,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
diff --git a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml
index 2aaf0496b10..3f6af8ba506 100644
--- a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml
@@ -12,7 +12,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
diff --git a/pkg/apis/controller/experiments/v1beta1/constants.go b/pkg/apis/controller/experiments/v1beta1/constants.go
index e595ce14ca4..d2135fba017 100644
--- a/pkg/apis/controller/experiments/v1beta1/constants.go
+++ b/pkg/apis/controller/experiments/v1beta1/constants.go
@@ -45,7 +45,6 @@ var (
"TFJob": true,
"PyTorchJob": true,
"XGBoostJob": true,
- "MXJob": true,
"MPIJob": true,
}
)
diff --git a/pkg/util/v1beta1/katibconfig/config_test.go b/pkg/util/v1beta1/katibconfig/config_test.go
index 67d24c10d71..9f7d41c2aed 100644
--- a/pkg/util/v1beta1/katibconfig/config_test.go
+++ b/pkg/util/v1beta1/katibconfig/config_test.go
@@ -401,7 +401,6 @@ init:
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- - MXJob.v1.kubeflow.org
webhookPort: 18443
enableLeaderElection: true
leaderElectionID: xyz0123
@@ -456,7 +455,6 @@ runtime:
"PyTorchJob.v1.kubeflow.org",
"MPIJob.v1.kubeflow.org",
"XGBoostJob.v1.kubeflow.org",
- "MXJob.v1.kubeflow.org",
},
WebhookPort: &customizedWebhookPort,
EnableLeaderElection: true,
diff --git a/sdk/python/v1beta1/kubeflow/katib/__init__.py b/sdk/python/v1beta1/kubeflow/katib/__init__.py
index bafe7befea3..c6ca7dda3b9 100644
--- a/sdk/python/v1beta1/kubeflow/katib/__init__.py
+++ b/sdk/python/v1beta1/kubeflow/katib/__init__.py
@@ -81,4 +81,3 @@
from kubeflow.katib.constants.constants import BASE_IMAGE_TENSORFLOW
from kubeflow.katib.constants.constants import BASE_IMAGE_TENSORFLOW_GPU
from kubeflow.katib.constants.constants import BASE_IMAGE_PYTORCH
-from kubeflow.katib.constants.constants import BASE_IMAGE_MXNET
diff --git a/sdk/python/v1beta1/kubeflow/katib/constants/constants.py b/sdk/python/v1beta1/kubeflow/katib/constants/constants.py
index 1e0478f48f8..8cc8db35bb6 100644
--- a/sdk/python/v1beta1/kubeflow/katib/constants/constants.py
+++ b/sdk/python/v1beta1/kubeflow/katib/constants/constants.py
@@ -57,7 +57,6 @@
BASE_IMAGE_TENSORFLOW = "docker.io/tensorflow/tensorflow:2.13.0"
BASE_IMAGE_TENSORFLOW_GPU = "docker.io/tensorflow/tensorflow:2.13.0-gpu"
BASE_IMAGE_PYTORCH = "docker.io/pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime"
-BASE_IMAGE_MXNET = "docker.io/mxnet/python:1.9.1_native_py3"
DEFAULT_DB_MANAGER_ADDRESS = "katib-db-manager.kubeflow:6789"