apiVersion: kubeflow.org/v1beta1 kind: Experiment metadata: finalizers: - update-prometheus-metrics generation: 1 name: katib-kfp-test-metrics-op-test namespace: common spec: algorithm: algorithmName: random maxFailedTrialCount: 1 maxTrialCount: 5 metricsCollectorSpec: collector: kind: StdOut objective: additionalMetricNames: - accuracy goal: 0.99 metricStrategies: - name: val-accuracy value: max - name: accuracy value: max objectiveMetricName: val-accuracy type: maximize parallelTrialCount: 2 parameters: - feasibleSpace: max: "0.03" min: "0.01" name: lr parameterType: double resumePolicy: LongRunning trialTemplate: failureCondition: status.[@this].#(phase=="Failed")# primaryContainerName: main primaryPodLabels: katib.kubeflow.org/model-training: "true" retain: true successCondition: status.[@this].#(phase=="Succeeded")# trialParameters: - description: Learning rate for the training model name: learningRate reference: lr trialSpec: apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: annotations: pipelines.kubeflow.org/kfp_sdk_version: 1.8.11 workflows.argoproj.io/pod-name-format: v1 generation: 8 spec: arguments: parameters: - name: lr value: ${trialParameters.learningRate} - name: optimizer value: Adam - name: loss value: categorical_crossentropy - name: epochs value: "1" - name: batch_size value: "32" entrypoint: test-pipeline podMetadata: labels: pipeline/runid: 163ced0c-cdc1-4ffc-a885-e27d936791bd serviceAccountName: default-editor templates: - container: args: - --trainset-flag - "True" - --data - /tmp/outputs/data/data command: - sh - -c - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'tensorflow==2.7.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'tensorflow==2.7.1' --user) && "$0" "$@" - sh - -ec - | program_path=$(mktemp) printf "%s" "$0" > "$program_path" python3 -u "$program_path" "$@" - | def _make_parent_dirs_and_return_path(file_path: str): import os os.makedirs(os.path.dirname(file_path), exist_ok=True) return file_path def load_data(data_path, trainset_flag = True, ): """ read the train and test data """ import pickle import tensorflow as tf # action for train set if trainset_flag: # load dataset (x_train, y_train), (_, _) = tf.keras.datasets.cifar10.load_data() #one-hot encode the categories y_train = tf.keras.utils.to_categorical(y_train) with open(data_path, 'wb') as output_file: pickle.dump((x_train,y_train), output_file) else: # load dataset (_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() #one-hot encode the categories y_test = tf.keras.utils.to_categorical(y_test) with open(data_path, 'wb') as output_file: pickle.dump((x_test,y_test), output_file) def _deserialize_bool(s) -> bool: from distutils.util import strtobool return strtobool(s) == 1 import argparse _parser = argparse.ArgumentParser(prog='Load data', description='read the train and test data') _parser.add_argument("--trainset-flag", dest="trainset_flag", type=_deserialize_bool, required=False, default=argparse.SUPPRESS) _parser.add_argument("--data", dest="data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parsed_args = vars(_parser.parse_args()) _outputs = load_data(**_parsed_args) image: python:3.7 name: "" resources: {} inputs: {} metadata: annotations: pipelines.kubeflow.org/arguments.parameters: '{"trainset_flag": "True"}' pipelines.kubeflow.org/component_ref: '{}' pipelines.kubeflow.org/component_spec: '{"description": "read the train and test data", "implementation": {"container": {"args": [{"if": {"cond": {"isPresent": "trainset_flag"}, "then": ["--trainset-flag", {"inputValue": "trainset_flag"}]}}, "--data", {"outputPath": "data"}], "command": ["sh", "-c", "(PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location ''tensorflow==2.7.1'' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location ''tensorflow==2.7.1'' --user) && \"$0\" \"$@\"", "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef load_data(data_path, trainset_flag = True,\n ):\n \"\"\"\n read the train and test data\n \"\"\"\n import pickle\n import tensorflow as tf\n\n # action for train set\n if trainset_flag:\n # load dataset\n (x_train, y_train), (_, _) = tf.keras.datasets.cifar10.load_data()\n\n #one-hot encode the categories\n y_train = tf.keras.utils.to_categorical(y_train)\n\n with open(data_path, ''wb'') as output_file:\n pickle.dump((x_train,y_train), output_file)\n\n else:\n # load dataset\n (_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()\n\n #one-hot encode the categories\n y_test = tf.keras.utils.to_categorical(y_test)\n\n with open(data_path, ''wb'') as output_file:\n pickle.dump((x_test,y_test), output_file)\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\nimport argparse\n_parser = argparse.ArgumentParser(prog=''Load data'', description=''read the train and test data'')\n_parser.add_argument(\"--trainset-flag\", dest=\"trainset_flag\", type=_deserialize_bool, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--data\", dest=\"data_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = load_data(**_parsed_args)\n"], "image": "python:3.7"}}, "inputs": [{"default": "True", "name": "trainset_flag", "optional": true, "type": "Boolean"}], "name": "Load data", "outputs": [{"name": "data", "type": "String"}]}' sidecar.istio.io/inject: "false" labels: pipelines.kubeflow.org/cache_enabled: "true" pipelines.kubeflow.org/enable_caching: "true" pipelines.kubeflow.org/kfp_sdk_version: 1.8.11 pipelines.kubeflow.org/pipeline-sdk-type: kfp name: load-data outputs: artifacts: - name: load-data-data path: /tmp/outputs/data/data - container: command: - sleep 60 && cat /tmp/inputs/data/data image: busybox name: "" resources: {} inputs: artifacts: - name: train-metrics_log path: /tmp/inputs/data/data metadata: annotations: pipelines.kubeflow.org/component_ref: '{"digest": "2f525828198d8cf22d54ffc55fee6ec07c54c2c77686dcd513be3679e2a40d97"}' pipelines.kubeflow.org/component_spec: '{"description": "Print file to stdout.", "implementation": {"container": {"args": [{"inputPath": "data"}], "command": ["sh", "-c", "cat $0\n"], "image": "busybox"}}, "inputs": [{"name": "data", "type": "String"}], "name": "Print file"}' pipelines.kubeflow.org/max_cache_staleness: P0D} labels: katib.kubeflow.org/model-training: "true" pipelines.kubeflow.org/cache_enabled: "false" pipelines.kubeflow.org/enable_caching: "false" pipelines.kubeflow.org/kfp_sdk_version: 1.8.11 pipelines.kubeflow.org/pipeline-sdk-type: kfp name: print-file outputs: {} - container: args: - --data-raw - /tmp/inputs/data_raw/data - --val-pct - "0.2" - --trainset-flag - "True" - --data-processed - /tmp/outputs/data_processed/data command: - sh - -c - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'scikit-learn' --user) && "$0" "$@" - sh - -ec - | program_path=$(mktemp) printf "%s" "$0" > "$program_path" python3 -u "$program_path" "$@" - | def _make_parent_dirs_and_return_path(file_path: str): import os os.makedirs(os.path.dirname(file_path), exist_ok=True) return file_path def process(data_raw_path, data_processed_path, val_pct = 0.2 , trainset_flag = True): """ Here we do all the preprocessing if the data path is for training data we: (1) Normalize the data (2) split the train and val data If it is for unseen test data, we: (1) Normalize the data This function returns in any case the processed data path """ # sklearn from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score import pickle with open(data_raw_path, 'rb') as f: x, y = pickle.load(f) if trainset_flag: x_ = x/255 x_train, x_val, y_train, y_val = train_test_split(x_, y, test_size = val_pct, stratify=y, random_state = 42) with open(data_processed_path, 'wb') as output_file: pickle.dump((x_train,y_train, x_val,y_val), output_file) else: x_ = x/255 with open(data_processed_path, 'wb') as output_file: pickle.dump((x_,y), output_file) def _deserialize_bool(s) -> bool: from distutils.util import strtobool return strtobool(s) == 1 import argparse _parser = argparse.ArgumentParser(prog='Process', description='Here we do all the preprocessing') _parser.add_argument("--data-raw", dest="data_raw_path", type=str, required=True, default=argparse.SUPPRESS) _parser.add_argument("--val-pct", dest="val_pct", type=float, required=False, default=argparse.SUPPRESS) _parser.add_argument("--trainset-flag", dest="trainset_flag", type=_deserialize_bool, required=False, default=argparse.SUPPRESS) _parser.add_argument("--data-processed", dest="data_processed_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parsed_args = vars(_parser.parse_args()) _outputs = process(**_parsed_args) image: tensorflow/tensorflow:2.7.1 name: "" resources: {} inputs: artifacts: - name: load-data-data path: /tmp/inputs/data_raw/data metadata: annotations: pipelines.kubeflow.org/arguments.parameters: '{"trainset_flag": "True", "val_pct": "0.2"}' pipelines.kubeflow.org/component_ref: '{}' pipelines.kubeflow.org/component_spec: '{"description": "Here we do all the preprocessing", "implementation": {"container": {"args": ["--data-raw", {"inputPath": "data_raw"}, {"if": {"cond": {"isPresent": "val_pct"}, "then": ["--val-pct", {"inputValue": "val_pct"}]}}, {"if": {"cond": {"isPresent": "trainset_flag"}, "then": ["--trainset-flag", {"inputValue": "trainset_flag"}]}}, "--data-processed", {"outputPath": "data_processed"}], "command": ["sh", "-c", "(PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location ''scikit-learn'' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location ''scikit-learn'' --user) && \"$0\" \"$@\"", "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef process(data_raw_path, data_processed_path, val_pct = 0.2 , trainset_flag = True):\n \"\"\"\n Here we do all the preprocessing\n if the data path is for training data we:\n (1) Normalize the data\n (2) split the train and val data\n If it is for unseen test data, we:\n (1) Normalize the data\n This function returns in any case the processed data path\n \"\"\"\n # sklearn\n from sklearn.model_selection import train_test_split\n from sklearn.metrics import accuracy_score, f1_score\n import pickle\n\n with open(data_raw_path, ''rb'') as f:\n x, y = pickle.load(f)\n if trainset_flag:\n\n x_ = x/255\n x_train, x_val, y_train, y_val = train_test_split(x_, y, test_size = val_pct, stratify=y, random_state = 42)\n\n with open(data_processed_path, ''wb'') as output_file:\n pickle.dump((x_train,y_train, x_val,y_val), output_file)\n\n else:\n x_ = x/255\n with open(data_processed_path, ''wb'') as output_file:\n pickle.dump((x_,y), output_file)\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\nimport argparse\n_parser = argparse.ArgumentParser(prog=''Process'', description=''Here we do all the preprocessing'')\n_parser.add_argument(\"--data-raw\", dest=\"data_raw_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--val-pct\", dest=\"val_pct\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--trainset-flag\", dest=\"trainset_flag\", type=_deserialize_bool, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--data-processed\", dest=\"data_processed_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = process(**_parsed_args)\n"], "image": "tensorflow/tensorflow:2.7.1"}}, "inputs": [{"name": "data_raw", "type": "String"}, {"default": "0.2", "name": "val_pct", "optional": true, "type": "Float"}, {"default": "True", "name": "trainset_flag", "optional": true, "type": "Boolean"}], "name": "Process", "outputs": [{"name": "data_processed", "type": "String"}]}' sidecar.istio.io/inject: "false" labels: pipelines.kubeflow.org/cache_enabled: "true" pipelines.kubeflow.org/enable_caching: "true" pipelines.kubeflow.org/kfp_sdk_version: 1.8.11 pipelines.kubeflow.org/pipeline-sdk-type: kfp name: process outputs: artifacts: - name: process-data_processed path: /tmp/outputs/data_processed/data - dag: tasks: - arguments: {} name: load-data template: load-data - arguments: artifacts: - from: '{{tasks.train.outputs.artifacts.train-metrics_log}}' name: train-metrics_log dependencies: - train name: print-file template: print-file - arguments: artifacts: - from: '{{tasks.load-data.outputs.artifacts.load-data-data}}' name: load-data-data dependencies: - load-data name: process template: process - arguments: artifacts: - from: '{{tasks.process.outputs.artifacts.process-data_processed}}' name: process-data_processed parameters: - name: batch_size value: '{{inputs.parameters.batch_size}}' - name: epochs value: '{{inputs.parameters.epochs}}' - name: lr value: '{{inputs.parameters.lr}}' - name: optimizer value: '{{inputs.parameters.optimizer}}' dependencies: - process name: train template: train inputs: parameters: - name: batch_size - name: epochs - name: lr - name: optimizer metadata: annotations: sidecar.istio.io/inject: "false" labels: pipelines.kubeflow.org/cache_enabled: "true" name: test-pipeline outputs: {} - container: args: - --data-train - /tmp/inputs/data_train/data - --lr - '{{inputs.parameters.lr}}' - --optimizer - '{{inputs.parameters.optimizer}}' - --loss - categorical_crossentropy - --epochs - '{{inputs.parameters.epochs}}' - --batch-size - '{{inputs.parameters.batch_size}}' - --model-out - /tmp/outputs/model_out/data - --mlpipeline-metrics - /tmp/outputs/mlpipeline_metrics/data - --metrics-log - /tmp/outputs/metrics_log/data command: - sh - -ec - | program_path=$(mktemp) printf "%s" "$0" > "$program_path" python3 -u "$program_path" "$@" - | def _make_parent_dirs_and_return_path(file_path: str): import os os.makedirs(os.path.dirname(file_path), exist_ok=True) return file_path def train(data_train_path, model_out_path, mlpipeline_metrics_path, metrics_log_path, lr=1e-4, optimizer="Adam", loss="categorical_crossentropy", epochs=1, batch_size=32): """ This is the simulated train part of our ML pipeline where training is performed """ import tensorflow as tf import pickle from tensorflow.keras.preprocessing.image import ImageDataGenerator from pathlib import Path import json with open(data_train_path, 'rb') as f: x_train,y_train, x_val,y_val = pickle.load(f) model = tf.keras.Sequential([ tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 3)), tf.keras.layers.MaxPooling2D(2, 2), tf.keras.layers.Conv2D(64, (3, 3), activation='relu'), tf.keras.layers.MaxPooling2D(2, 2), tf.keras.layers.Flatten(), tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax") ]) if optimizer.lower() == "sgd": optimizer = tf.keras.optimizers.SGD(lr) else: optimizer = tf.keras.optimizers.Adam(lr) model.compile(loss=loss, optimizer= optimizer, metrics=['accuracy']) # fit the model model_early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, restore_best_weights=False) train_datagen = ImageDataGenerator( horizontal_flip=True ) validation_datagen = ImageDataGenerator() history = model.fit(train_datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=validation_datagen.flow(x_val, y_val, batch_size=batch_size), shuffle=False, callbacks=[model_early_stopping_callback]) #model_out.metadata.update(**history.history) model.save(model_out_path, save_format="tf" ) # Log accuracz metrics = { 'metrics': [{ 'name': 'accuracy', # The name of the metric. Visualized as the column name in the runs table. 'numberValue': history.history['accuracy'][0], # The value of the metric. Must be a numeric value. 'format': "PERCENTAGE", # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format). }, { 'name': 'val-accuracy', # The name of the metric. Visualized as the column name in the runs table. 'numberValue': history.history['val_accuracy'][0], # The value of the metric. Must be a numeric value. 'format': "PERCENTAGE", # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format). }] } #fn_log = Path('/var/log/katib/metrics.log') #fn_log.parent.mkdir(parents=False, exist_ok=False) with open(metrics_log_path, 'w') as f: for m in metrics['metrics']: print(f"{m['name']}={m['numberValue']}") f.write(f"{m['name']}={m['numberValue']}\n") with open(mlpipeline_metrics_path, 'w') as f: json.dump(metrics, f) import argparse _parser = argparse.ArgumentParser(prog='Train', description='This is the simulated train part of our ML pipeline where training is performed') _parser.add_argument("--data-train", dest="data_train_path", type=str, required=True, default=argparse.SUPPRESS) _parser.add_argument("--lr", dest="lr", type=float, required=False, default=argparse.SUPPRESS) _parser.add_argument("--optimizer", dest="optimizer", type=str, required=False, default=argparse.SUPPRESS) _parser.add_argument("--loss", dest="loss", type=str, required=False, default=argparse.SUPPRESS) _parser.add_argument("--epochs", dest="epochs", type=int, required=False, default=argparse.SUPPRESS) _parser.add_argument("--batch-size", dest="batch_size", type=int, required=False, default=argparse.SUPPRESS) _parser.add_argument("--model-out", dest="model_out_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parser.add_argument("--metrics-log", dest="metrics_log_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parsed_args = vars(_parser.parse_args()) _outputs = train(**_parsed_args) image: jupyter/tensorflow-notebook:tensorflow-2.9.1 name: "" resources: {} inputs: artifacts: - name: process-data_processed path: /tmp/inputs/data_train/data parameters: - name: batch_size - name: epochs - name: lr - name: optimizer metadata: annotations: pipelines.kubeflow.org/arguments.parameters: '{"batch_size": "{{inputs.parameters.batch_size}}", "epochs": "{{inputs.parameters.epochs}}", "loss": "categorical_crossentropy", "lr": "{{inputs.parameters.lr}}", "optimizer": "{{inputs.parameters.optimizer}}"}' pipelines.kubeflow.org/component_ref: '{}' pipelines.kubeflow.org/component_spec: '{"description": "This is the simulated train part of our ML pipeline where training is performed", "implementation": {"container": {"args": ["--data-train", {"inputPath": "data_train"}, {"if": {"cond": {"isPresent": "lr"}, "then": ["--lr", {"inputValue": "lr"}]}}, {"if": {"cond": {"isPresent": "optimizer"}, "then": ["--optimizer", {"inputValue": "optimizer"}]}}, {"if": {"cond": {"isPresent": "loss"}, "then": ["--loss", {"inputValue": "loss"}]}}, {"if": {"cond": {"isPresent": "epochs"}, "then": ["--epochs", {"inputValue": "epochs"}]}}, {"if": {"cond": {"isPresent": "batch_size"}, "then": ["--batch-size", {"inputValue": "batch_size"}]}}, "--model-out", {"outputPath": "model_out"}, "--mlpipeline-metrics", {"outputPath": "mlpipeline_metrics"}, "--metrics-log", {"outputPath": "metrics_log"}], "command": ["sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef train(data_train_path, model_out_path, mlpipeline_metrics_path,\n metrics_log_path, lr=1e-4,\n optimizer=\"Adam\", loss=\"categorical_crossentropy\", epochs=1,\n batch_size=32):\n \"\"\"\n This is the simulated train part of our ML pipeline where training is performed\n \"\"\"\n\n import tensorflow as tf\n import pickle\n from tensorflow.keras.preprocessing.image import ImageDataGenerator\n from pathlib import Path\n import json\n\n with open(data_train_path, ''rb'') as f:\n x_train,y_train, x_val,y_val = pickle.load(f)\n\n model = tf.keras.Sequential([\n tf.keras.layers.Conv2D(64, (3, 3), activation=''relu'', input_shape=(32, 32, 3)),\n tf.keras.layers.MaxPooling2D(2, 2),\n tf.keras.layers.Conv2D(64, (3, 3), activation=''relu''),\n tf.keras.layers.MaxPooling2D(2, 2),\n tf.keras.layers.Flatten(),\n tf.keras.layers.Dense(128, activation=\"relu\"),\n tf.keras.layers.Dense(10, activation=\"softmax\")\n ])\n\n if optimizer.lower() == \"sgd\":\n optimizer = tf.keras.optimizers.SGD(lr)\n else:\n optimizer = tf.keras.optimizers.Adam(lr)\n\n model.compile(loss=loss,\n optimizer= optimizer,\n metrics=[''accuracy''])\n\n # fit the model\n model_early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor=''val_accuracy'', patience=10, verbose=1, restore_best_weights=False)\n\n train_datagen = ImageDataGenerator(\n horizontal_flip=True\n )\n\n validation_datagen = ImageDataGenerator()\n history = model.fit(train_datagen.flow(x_train, y_train, batch_size=batch_size),\n epochs=epochs,\n validation_data=validation_datagen.flow(x_val, y_val, batch_size=batch_size),\n shuffle=False,\n callbacks=[model_early_stopping_callback])\n #model_out.metadata.update(**history.history)\n model.save(model_out_path, save_format=\"tf\" )\n # Log accuracz\n\n metrics = {\n ''metrics'': [{\n ''name'': ''accuracy'', # The name of the metric. Visualized as the column name in the runs table.\n ''numberValue'': history.history[''accuracy''][0], # The value of the metric. Must be a numeric value.\n ''format'': \"PERCENTAGE\", # The optional format of the metric. Supported values are \"RAW\" (displayed in raw format) and \"PERCENTAGE\" (displayed in percentage format).\n },\n {\n ''name'': ''val-accuracy'', # The name of the metric. Visualized as the column name in the runs table.\n ''numberValue'': history.history[''val_accuracy''][0], # The value of the metric. Must be a numeric value.\n ''format'': \"PERCENTAGE\", # The optional format of the metric. Supported values are \"RAW\" (displayed in raw format) and \"PERCENTAGE\" (displayed in percentage format).\n }]\n }\n #fn_log = Path(''/var/log/katib/metrics.log'')\n #fn_log.parent.mkdir(parents=False, exist_ok=False)\n with open(metrics_log_path, ''w'') as f:\n for m in metrics[''metrics'']:\n print(f\"{m[''name'']}={m[''numberValue'']}\")\n f.write(f\"{m[''name'']}={m[''numberValue'']}\\n\")\n\n with open(mlpipeline_metrics_path, ''w'') as f:\n json.dump(metrics, f)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog=''Train'', description=''This is the simulated train part of our ML pipeline where training is performed'')\n_parser.add_argument(\"--data-train\", dest=\"data_train_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--lr\", dest=\"lr\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--optimizer\", dest=\"optimizer\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--loss\", dest=\"loss\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--epochs\", dest=\"epochs\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--batch-size\", dest=\"batch_size\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-out\", dest=\"model_out_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--mlpipeline-metrics\", dest=\"mlpipeline_metrics_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--metrics-log\", dest=\"metrics_log_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = train(**_parsed_args)\n"], "image": "jupyter/tensorflow-notebook:tensorflow-2.9.1"}}, "inputs": [{"name": "data_train", "type": "String"}, {"default": "0.0001", "name": "lr", "optional": true, "type": "Float"}, {"default": "Adam", "name": "optimizer", "optional": true, "type": "String"}, {"default": "categorical_crossentropy", "name": "loss", "optional": true, "type": "String"}, {"default": "1", "name": "epochs", "optional": true, "type": "Integer"}, {"default": "32", "name": "batch_size", "optional": true, "type": "Integer"}], "name": "Train", "outputs": [{"name": "model_out", "type": "String"}, {"name": "mlpipeline_metrics", "type": "Metrics"}, {"name": "metrics_log", "type": "String"}]}' sidecar.istio.io/inject: "false" labels: pipelines.kubeflow.org/cache_enabled: "true" pipelines.kubeflow.org/enable_caching: "true" pipelines.kubeflow.org/kfp_sdk_version: 1.8.11 pipelines.kubeflow.org/pipeline-sdk-type: kfp name: train outputs: artifacts: - name: mlpipeline-metrics optional: true path: /tmp/outputs/mlpipeline_metrics/data - name: train-metrics_log path: /tmp/outputs/metrics_log/data - name: train-model_out path: /tmp/outputs/model_out/data