apiVersion: kubeflow.org/v1beta1
kind: Experiment
metadata:
  finalizers:
  - update-prometheus-metrics
  generation: 1
  name: katib-kfp-test-metrics-op-test
  namespace: common
spec:
  algorithm:
    algorithmName: random
  maxFailedTrialCount: 1
  maxTrialCount: 5
  metricsCollectorSpec:
    collector:
      kind: StdOut
  objective:
    additionalMetricNames:
    - accuracy
    goal: 0.99
    metricStrategies:
    - name: val-accuracy
      value: max
    - name: accuracy
      value: max
    objectiveMetricName: val-accuracy
    type: maximize
  parallelTrialCount: 2
  parameters:
  - feasibleSpace:
      max: "0.03"
      min: "0.01"
    name: lr
    parameterType: double
  resumePolicy: LongRunning
  trialTemplate:
    failureCondition: status.[@this].#(phase=="Failed")#
    primaryContainerName: main
    primaryPodLabels:
      katib.kubeflow.org/model-training: "true"
    retain: true
    successCondition: status.[@this].#(phase=="Succeeded")#
    trialParameters:
    - description: Learning rate for the training model
      name: learningRate
      reference: lr
    trialSpec:
      apiVersion: argoproj.io/v1alpha1
      kind: Workflow
      metadata:
        annotations:
          pipelines.kubeflow.org/kfp_sdk_version: 1.8.11
          workflows.argoproj.io/pod-name-format: v1
        generation: 8
      spec:
        arguments:
          parameters:
          - name: lr
            value: ${trialParameters.learningRate}
          - name: optimizer
            value: Adam
          - name: loss
            value: categorical_crossentropy
          - name: epochs
            value: "1"
          - name: batch_size
            value: "32"
        entrypoint: test-pipeline
        podMetadata:
          labels:
            pipeline/runid: 163ced0c-cdc1-4ffc-a885-e27d936791bd
        serviceAccountName: default-editor
        templates:
        - container:
            args:
            - --trainset-flag
            - "True"
            - --data
            - /tmp/outputs/data/data
            command:
            - sh
            - -c
            - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
              'tensorflow==2.7.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip
              install --quiet --no-warn-script-location 'tensorflow==2.7.1' --user)
              && "$0" "$@"
            - sh
            - -ec
            - |
              program_path=$(mktemp)
              printf "%s" "$0" > "$program_path"
              python3 -u "$program_path" "$@"
            - |
              def _make_parent_dirs_and_return_path(file_path: str):
                  import os
                  os.makedirs(os.path.dirname(file_path), exist_ok=True)
                  return file_path

              def load_data(data_path, trainset_flag = True,
                           ):
                  """
                  read the train and test data
                  """
                  import pickle
                  import tensorflow as tf

                  # action for train set
                  if trainset_flag:
                      # load dataset
                      (x_train, y_train), (_, _) = tf.keras.datasets.cifar10.load_data()

                      #one-hot encode the categories
                      y_train = tf.keras.utils.to_categorical(y_train)

                      with open(data_path, 'wb') as output_file:
                          pickle.dump((x_train,y_train), output_file)

                  else:
                      # load dataset
                      (_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

                      #one-hot encode the categories
                      y_test = tf.keras.utils.to_categorical(y_test)

                      with open(data_path, 'wb') as output_file:
                          pickle.dump((x_test,y_test), output_file)

              def _deserialize_bool(s) -> bool:
                  from distutils.util import strtobool
                  return strtobool(s) == 1

              import argparse
              _parser = argparse.ArgumentParser(prog='Load data', description='read the train and test data')
              _parser.add_argument("--trainset-flag", dest="trainset_flag", type=_deserialize_bool, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--data", dest="data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
              _parsed_args = vars(_parser.parse_args())

              _outputs = load_data(**_parsed_args)
            image: python:3.7
            name: ""
            resources: {}
          inputs: {}
          metadata:
            annotations:
              pipelines.kubeflow.org/arguments.parameters: '{"trainset_flag": "True"}'
              pipelines.kubeflow.org/component_ref: '{}'
              pipelines.kubeflow.org/component_spec: '{"description": "read the train
                and test data", "implementation": {"container": {"args": [{"if": {"cond":
                {"isPresent": "trainset_flag"}, "then": ["--trainset-flag", {"inputValue":
                "trainset_flag"}]}}, "--data", {"outputPath": "data"}], "command":
                ["sh", "-c", "(PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
                --quiet --no-warn-script-location ''tensorflow==2.7.1'' || PIP_DISABLE_PIP_VERSION_CHECK=1
                python3 -m pip install --quiet --no-warn-script-location ''tensorflow==2.7.1''
                --user) && \"$0\" \"$@\"", "sh", "-ec", "program_path=$(mktemp)\nprintf
                \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n",
                "def _make_parent_dirs_and_return_path(file_path: str):\n    import
                os\n    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return
                file_path\n\ndef load_data(data_path, trainset_flag = True,\n             ):\n    \"\"\"\n    read
                the train and test data\n    \"\"\"\n    import pickle\n    import
                tensorflow as tf\n\n    # action for train set\n    if trainset_flag:\n        #
                load dataset\n        (x_train, y_train), (_, _) = tf.keras.datasets.cifar10.load_data()\n\n        #one-hot
                encode the categories\n        y_train = tf.keras.utils.to_categorical(y_train)\n\n        with
                open(data_path, ''wb'') as output_file:\n            pickle.dump((x_train,y_train),
                output_file)\n\n    else:\n        # load dataset\n        (_, _),
                (x_test, y_test) = tf.keras.datasets.cifar10.load_data()\n\n        #one-hot
                encode the categories\n        y_test = tf.keras.utils.to_categorical(y_test)\n\n        with
                open(data_path, ''wb'') as output_file:\n            pickle.dump((x_test,y_test),
                output_file)\n\ndef _deserialize_bool(s) -> bool:\n    from distutils.util
                import strtobool\n    return strtobool(s) == 1\n\nimport argparse\n_parser
                = argparse.ArgumentParser(prog=''Load data'', description=''read the
                train and test data'')\n_parser.add_argument(\"--trainset-flag\",
                dest=\"trainset_flag\", type=_deserialize_bool, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--data\",
                dest=\"data_path\", type=_make_parent_dirs_and_return_path, required=True,
                default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs
                = load_data(**_parsed_args)\n"], "image": "python:3.7"}}, "inputs":
                [{"default": "True", "name": "trainset_flag", "optional": true, "type":
                "Boolean"}], "name": "Load data", "outputs": [{"name": "data", "type":
                "String"}]}'
              sidecar.istio.io/inject: "false"
            labels:
              pipelines.kubeflow.org/cache_enabled: "true"
              pipelines.kubeflow.org/enable_caching: "true"
              pipelines.kubeflow.org/kfp_sdk_version: 1.8.11
              pipelines.kubeflow.org/pipeline-sdk-type: kfp
          name: load-data
          outputs:
            artifacts:
            - name: load-data-data
              path: /tmp/outputs/data/data
        - container:
            command:
            - sleep 60 && cat /tmp/inputs/data/data
            image: busybox
            name: ""
            resources: {}
          inputs:
            artifacts:
            - name: train-metrics_log
              path: /tmp/inputs/data/data
          metadata:
            annotations:
              pipelines.kubeflow.org/component_ref: '{"digest": "2f525828198d8cf22d54ffc55fee6ec07c54c2c77686dcd513be3679e2a40d97"}'
              pipelines.kubeflow.org/component_spec: '{"description": "Print file
                to stdout.", "implementation": {"container": {"args": [{"inputPath":
                "data"}], "command": ["sh", "-c", "cat $0\n"], "image": "busybox"}},
                "inputs": [{"name": "data", "type": "String"}], "name": "Print file"}'
              pipelines.kubeflow.org/max_cache_staleness: P0D}
            labels:
              katib.kubeflow.org/model-training: "true"
              pipelines.kubeflow.org/cache_enabled: "false"
              pipelines.kubeflow.org/enable_caching: "false"
              pipelines.kubeflow.org/kfp_sdk_version: 1.8.11
              pipelines.kubeflow.org/pipeline-sdk-type: kfp
          name: print-file
          outputs: {}
        - container:
            args:
            - --data-raw
            - /tmp/inputs/data_raw/data
            - --val-pct
            - "0.2"
            - --trainset-flag
            - "True"
            - --data-processed
            - /tmp/outputs/data_processed/data
            command:
            - sh
            - -c
            - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
              'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
              --quiet --no-warn-script-location 'scikit-learn' --user) && "$0" "$@"
            - sh
            - -ec
            - |
              program_path=$(mktemp)
              printf "%s" "$0" > "$program_path"
              python3 -u "$program_path" "$@"
            - |
              def _make_parent_dirs_and_return_path(file_path: str):
                  import os
                  os.makedirs(os.path.dirname(file_path), exist_ok=True)
                  return file_path

              def process(data_raw_path, data_processed_path, val_pct = 0.2 , trainset_flag = True):
                  """
                  Here we do all the preprocessing
                  if the data path is for training data we:
                  (1) Normalize the data
                  (2) split the train and val data
                  If it is for unseen test data, we:
                  (1) Normalize the data
                  This function returns in any case the processed data path
                    """
                  # sklearn
                  from sklearn.model_selection import train_test_split
                  from sklearn.metrics import accuracy_score, f1_score
                  import pickle

                  with open(data_raw_path, 'rb') as f:
                      x, y = pickle.load(f)
                  if trainset_flag:

                      x_ = x/255
                      x_train, x_val, y_train, y_val = train_test_split(x_, y, test_size = val_pct, stratify=y, random_state = 42)

                      with open(data_processed_path, 'wb') as output_file:
                          pickle.dump((x_train,y_train, x_val,y_val), output_file)

                  else:
                      x_ = x/255
                      with open(data_processed_path, 'wb') as output_file:
                          pickle.dump((x_,y), output_file)

              def _deserialize_bool(s) -> bool:
                  from distutils.util import strtobool
                  return strtobool(s) == 1

              import argparse
              _parser = argparse.ArgumentParser(prog='Process', description='Here we do all the preprocessing')
              _parser.add_argument("--data-raw", dest="data_raw_path", type=str, required=True, default=argparse.SUPPRESS)
              _parser.add_argument("--val-pct", dest="val_pct", type=float, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--trainset-flag", dest="trainset_flag", type=_deserialize_bool, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--data-processed", dest="data_processed_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
              _parsed_args = vars(_parser.parse_args())

              _outputs = process(**_parsed_args)
            image: tensorflow/tensorflow:2.7.1
            name: ""
            resources: {}
          inputs:
            artifacts:
            - name: load-data-data
              path: /tmp/inputs/data_raw/data
          metadata:
            annotations:
              pipelines.kubeflow.org/arguments.parameters: '{"trainset_flag": "True",
                "val_pct": "0.2"}'
              pipelines.kubeflow.org/component_ref: '{}'
              pipelines.kubeflow.org/component_spec: '{"description": "Here we do
                all the preprocessing", "implementation": {"container": {"args": ["--data-raw",
                {"inputPath": "data_raw"}, {"if": {"cond": {"isPresent": "val_pct"},
                "then": ["--val-pct", {"inputValue": "val_pct"}]}}, {"if": {"cond":
                {"isPresent": "trainset_flag"}, "then": ["--trainset-flag", {"inputValue":
                "trainset_flag"}]}}, "--data-processed", {"outputPath": "data_processed"}],
                "command": ["sh", "-c", "(PIP_DISABLE_PIP_VERSION_CHECK=1 python3
                -m pip install --quiet --no-warn-script-location ''scikit-learn''
                || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
                --no-warn-script-location ''scikit-learn'' --user) && \"$0\" \"$@\"",
                "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3
                -u \"$program_path\" \"$@\"\n", "def _make_parent_dirs_and_return_path(file_path:
                str):\n    import os\n    os.makedirs(os.path.dirname(file_path),
                exist_ok=True)\n    return file_path\n\ndef process(data_raw_path,
                data_processed_path, val_pct = 0.2 , trainset_flag = True):\n    \"\"\"\n    Here
                we do all the preprocessing\n    if the data path is for training
                data we:\n    (1) Normalize the data\n    (2) split the train and
                val data\n    If it is for unseen test data, we:\n    (1) Normalize
                the data\n    This function returns in any case the processed data
                path\n      \"\"\"\n    # sklearn\n    from sklearn.model_selection
                import train_test_split\n    from sklearn.metrics import accuracy_score,
                f1_score\n    import pickle\n\n    with open(data_raw_path, ''rb'')
                as f:\n        x, y = pickle.load(f)\n    if trainset_flag:\n\n        x_
                = x/255\n        x_train, x_val, y_train, y_val = train_test_split(x_,
                y, test_size = val_pct, stratify=y, random_state = 42)\n\n        with
                open(data_processed_path, ''wb'') as output_file:\n            pickle.dump((x_train,y_train,
                x_val,y_val), output_file)\n\n    else:\n        x_ = x/255\n        with
                open(data_processed_path, ''wb'') as output_file:\n            pickle.dump((x_,y),
                output_file)\n\ndef _deserialize_bool(s) -> bool:\n    from distutils.util
                import strtobool\n    return strtobool(s) == 1\n\nimport argparse\n_parser
                = argparse.ArgumentParser(prog=''Process'', description=''Here we
                do all the preprocessing'')\n_parser.add_argument(\"--data-raw\",
                dest=\"data_raw_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--val-pct\",
                dest=\"val_pct\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--trainset-flag\",
                dest=\"trainset_flag\", type=_deserialize_bool, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--data-processed\",
                dest=\"data_processed_path\", type=_make_parent_dirs_and_return_path,
                required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs
                = process(**_parsed_args)\n"], "image": "tensorflow/tensorflow:2.7.1"}},
                "inputs": [{"name": "data_raw", "type": "String"}, {"default": "0.2",
                "name": "val_pct", "optional": true, "type": "Float"}, {"default":
                "True", "name": "trainset_flag", "optional": true, "type": "Boolean"}],
                "name": "Process", "outputs": [{"name": "data_processed", "type":
                "String"}]}'
              sidecar.istio.io/inject: "false"
            labels:
              pipelines.kubeflow.org/cache_enabled: "true"
              pipelines.kubeflow.org/enable_caching: "true"
              pipelines.kubeflow.org/kfp_sdk_version: 1.8.11
              pipelines.kubeflow.org/pipeline-sdk-type: kfp
          name: process
          outputs:
            artifacts:
            - name: process-data_processed
              path: /tmp/outputs/data_processed/data
        - dag:
            tasks:
            - arguments: {}
              name: load-data
              template: load-data
            - arguments:
                artifacts:
                - from: '{{tasks.train.outputs.artifacts.train-metrics_log}}'
                  name: train-metrics_log
              dependencies:
              - train
              name: print-file
              template: print-file
            - arguments:
                artifacts:
                - from: '{{tasks.load-data.outputs.artifacts.load-data-data}}'
                  name: load-data-data
              dependencies:
              - load-data
              name: process
              template: process
            - arguments:
                artifacts:
                - from: '{{tasks.process.outputs.artifacts.process-data_processed}}'
                  name: process-data_processed
                parameters:
                - name: batch_size
                  value: '{{inputs.parameters.batch_size}}'
                - name: epochs
                  value: '{{inputs.parameters.epochs}}'
                - name: lr
                  value: '{{inputs.parameters.lr}}'
                - name: optimizer
                  value: '{{inputs.parameters.optimizer}}'
              dependencies:
              - process
              name: train
              template: train
          inputs:
            parameters:
            - name: batch_size
            - name: epochs
            - name: lr
            - name: optimizer
          metadata:
            annotations:
              sidecar.istio.io/inject: "false"
            labels:
              pipelines.kubeflow.org/cache_enabled: "true"
          name: test-pipeline
          outputs: {}
        - container:
            args:
            - --data-train
            - /tmp/inputs/data_train/data
            - --lr
            - '{{inputs.parameters.lr}}'
            - --optimizer
            - '{{inputs.parameters.optimizer}}'
            - --loss
            - categorical_crossentropy
            - --epochs
            - '{{inputs.parameters.epochs}}'
            - --batch-size
            - '{{inputs.parameters.batch_size}}'
            - --model-out
            - /tmp/outputs/model_out/data
            - --mlpipeline-metrics
            - /tmp/outputs/mlpipeline_metrics/data
            - --metrics-log
            - /tmp/outputs/metrics_log/data
            command:
            - sh
            - -ec
            - |
              program_path=$(mktemp)
              printf "%s" "$0" > "$program_path"
              python3 -u "$program_path" "$@"
            - |
              def _make_parent_dirs_and_return_path(file_path: str):
                  import os
                  os.makedirs(os.path.dirname(file_path), exist_ok=True)
                  return file_path

              def train(data_train_path, model_out_path, mlpipeline_metrics_path,
                        metrics_log_path, lr=1e-4,
                        optimizer="Adam", loss="categorical_crossentropy", epochs=1,
                        batch_size=32):
                  """
                  This is the simulated train part of our ML pipeline where training is performed
                  """

                  import tensorflow as tf
                  import pickle
                  from tensorflow.keras.preprocessing.image import ImageDataGenerator
                  from pathlib import Path
                  import json

                  with open(data_train_path, 'rb') as f:
                      x_train,y_train, x_val,y_val = pickle.load(f)

                  model = tf.keras.Sequential([
                      tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 3)),
                      tf.keras.layers.MaxPooling2D(2, 2),
                      tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
                      tf.keras.layers.MaxPooling2D(2, 2),
                      tf.keras.layers.Flatten(),
                      tf.keras.layers.Dense(128, activation="relu"),
                      tf.keras.layers.Dense(10, activation="softmax")
                      ])

                  if optimizer.lower() == "sgd":
                      optimizer = tf.keras.optimizers.SGD(lr)
                  else:
                      optimizer = tf.keras.optimizers.Adam(lr)

                  model.compile(loss=loss,
                            optimizer= optimizer,
                            metrics=['accuracy'])

                  # fit the model
                  model_early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, restore_best_weights=False)

                  train_datagen = ImageDataGenerator(
                      horizontal_flip=True
                  )

                  validation_datagen = ImageDataGenerator()
                  history = model.fit(train_datagen.flow(x_train, y_train, batch_size=batch_size),
                        epochs=epochs,
                        validation_data=validation_datagen.flow(x_val, y_val, batch_size=batch_size),
                        shuffle=False,
                        callbacks=[model_early_stopping_callback])
                  #model_out.metadata.update(**history.history)
                  model.save(model_out_path, save_format="tf" )
                  # Log accuracz

                  metrics = {
                  'metrics': [{
                    'name': 'accuracy', # The name of the metric. Visualized as the column name in the runs table.
                    'numberValue':  history.history['accuracy'][0], # The value of the metric. Must be a numeric value.
                    'format': "PERCENTAGE",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
                  },
                  {
                    'name': 'val-accuracy', # The name of the metric. Visualized as the column name in the runs table.
                    'numberValue':  history.history['val_accuracy'][0], # The value of the metric. Must be a numeric value.
                    'format': "PERCENTAGE",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
                  }]
                  }
                  #fn_log = Path('/var/log/katib/metrics.log')
                  #fn_log.parent.mkdir(parents=False, exist_ok=False)
                  with open(metrics_log_path, 'w') as f:
                      for m in metrics['metrics']:
                          print(f"{m['name']}={m['numberValue']}")
                          f.write(f"{m['name']}={m['numberValue']}\n")

                  with open(mlpipeline_metrics_path, 'w') as f:
                      json.dump(metrics, f)

              import argparse
              _parser = argparse.ArgumentParser(prog='Train', description='This is the simulated train part of our ML pipeline where training is performed')
              _parser.add_argument("--data-train", dest="data_train_path", type=str, required=True, default=argparse.SUPPRESS)
              _parser.add_argument("--lr", dest="lr", type=float, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--optimizer", dest="optimizer", type=str, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--loss", dest="loss", type=str, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--epochs", dest="epochs", type=int, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--batch-size", dest="batch_size", type=int, required=False, default=argparse.SUPPRESS)
              _parser.add_argument("--model-out", dest="model_out_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
              _parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
              _parser.add_argument("--metrics-log", dest="metrics_log_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
              _parsed_args = vars(_parser.parse_args())

              _outputs = train(**_parsed_args)
            image: jupyter/tensorflow-notebook:tensorflow-2.9.1
            name: ""
            resources: {}
          inputs:
            artifacts:
            - name: process-data_processed
              path: /tmp/inputs/data_train/data
            parameters:
            - name: batch_size
            - name: epochs
            - name: lr
            - name: optimizer
          metadata:
            annotations:
              pipelines.kubeflow.org/arguments.parameters: '{"batch_size": "{{inputs.parameters.batch_size}}",
                "epochs": "{{inputs.parameters.epochs}}", "loss": "categorical_crossentropy",
                "lr": "{{inputs.parameters.lr}}", "optimizer": "{{inputs.parameters.optimizer}}"}'
              pipelines.kubeflow.org/component_ref: '{}'
              pipelines.kubeflow.org/component_spec: '{"description": "This is the
                simulated train part of our ML pipeline where training is performed",
                "implementation": {"container": {"args": ["--data-train", {"inputPath":
                "data_train"}, {"if": {"cond": {"isPresent": "lr"}, "then": ["--lr",
                {"inputValue": "lr"}]}}, {"if": {"cond": {"isPresent": "optimizer"},
                "then": ["--optimizer", {"inputValue": "optimizer"}]}}, {"if": {"cond":
                {"isPresent": "loss"}, "then": ["--loss", {"inputValue": "loss"}]}},
                {"if": {"cond": {"isPresent": "epochs"}, "then": ["--epochs", {"inputValue":
                "epochs"}]}}, {"if": {"cond": {"isPresent": "batch_size"}, "then":
                ["--batch-size", {"inputValue": "batch_size"}]}}, "--model-out", {"outputPath":
                "model_out"}, "--mlpipeline-metrics", {"outputPath": "mlpipeline_metrics"},
                "--metrics-log", {"outputPath": "metrics_log"}], "command": ["sh",
                "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3
                -u \"$program_path\" \"$@\"\n", "def _make_parent_dirs_and_return_path(file_path:
                str):\n    import os\n    os.makedirs(os.path.dirname(file_path),
                exist_ok=True)\n    return file_path\n\ndef train(data_train_path,
                model_out_path, mlpipeline_metrics_path,\n          metrics_log_path,
                lr=1e-4,\n          optimizer=\"Adam\", loss=\"categorical_crossentropy\",
                epochs=1,\n          batch_size=32):\n    \"\"\"\n    This is the
                simulated train part of our ML pipeline where training is performed\n    \"\"\"\n\n    import
                tensorflow as tf\n    import pickle\n    from tensorflow.keras.preprocessing.image
                import ImageDataGenerator\n    from pathlib import Path\n    import
                json\n\n    with open(data_train_path, ''rb'') as f:\n        x_train,y_train,
                x_val,y_val = pickle.load(f)\n\n    model = tf.keras.Sequential([\n        tf.keras.layers.Conv2D(64,
                (3, 3), activation=''relu'', input_shape=(32, 32, 3)),\n        tf.keras.layers.MaxPooling2D(2,
                2),\n        tf.keras.layers.Conv2D(64, (3, 3), activation=''relu''),\n        tf.keras.layers.MaxPooling2D(2,
                2),\n        tf.keras.layers.Flatten(),\n        tf.keras.layers.Dense(128,
                activation=\"relu\"),\n        tf.keras.layers.Dense(10, activation=\"softmax\")\n        ])\n\n    if
                optimizer.lower() == \"sgd\":\n        optimizer = tf.keras.optimizers.SGD(lr)\n    else:\n        optimizer
                = tf.keras.optimizers.Adam(lr)\n\n    model.compile(loss=loss,\n              optimizer=
                optimizer,\n              metrics=[''accuracy''])\n\n    # fit the
                model\n    model_early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor=''val_accuracy'',
                patience=10, verbose=1, restore_best_weights=False)\n\n    train_datagen
                = ImageDataGenerator(\n        horizontal_flip=True\n    )\n\n    validation_datagen
                = ImageDataGenerator()\n    history = model.fit(train_datagen.flow(x_train,
                y_train, batch_size=batch_size),\n          epochs=epochs,\n          validation_data=validation_datagen.flow(x_val,
                y_val, batch_size=batch_size),\n          shuffle=False,\n          callbacks=[model_early_stopping_callback])\n    #model_out.metadata.update(**history.history)\n    model.save(model_out_path,
                save_format=\"tf\" )\n    # Log accuracz\n\n    metrics = {\n    ''metrics'':
                [{\n      ''name'': ''accuracy'', # The name of the metric. Visualized
                as the column name in the runs table.\n      ''numberValue'':  history.history[''accuracy''][0],
                # The value of the metric. Must be a numeric value.\n      ''format'':
                \"PERCENTAGE\",   # The optional format of the metric. Supported values
                are \"RAW\" (displayed in raw format) and \"PERCENTAGE\" (displayed
                in percentage format).\n    },\n    {\n      ''name'': ''val-accuracy'',
                # The name of the metric. Visualized as the column name in the runs
                table.\n      ''numberValue'':  history.history[''val_accuracy''][0],
                # The value of the metric. Must be a numeric value.\n      ''format'':
                \"PERCENTAGE\",   # The optional format of the metric. Supported values
                are \"RAW\" (displayed in raw format) and \"PERCENTAGE\" (displayed
                in percentage format).\n    }]\n    }\n    #fn_log = Path(''/var/log/katib/metrics.log'')\n    #fn_log.parent.mkdir(parents=False,
                exist_ok=False)\n    with open(metrics_log_path, ''w'') as f:\n        for
                m in metrics[''metrics'']:\n            print(f\"{m[''name'']}={m[''numberValue'']}\")\n            f.write(f\"{m[''name'']}={m[''numberValue'']}\\n\")\n\n    with
                open(mlpipeline_metrics_path, ''w'') as f:\n        json.dump(metrics,
                f)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog=''Train'',
                description=''This is the simulated train part of our ML pipeline
                where training is performed'')\n_parser.add_argument(\"--data-train\",
                dest=\"data_train_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--lr\",
                dest=\"lr\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--optimizer\",
                dest=\"optimizer\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--loss\",
                dest=\"loss\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--epochs\",
                dest=\"epochs\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--batch-size\",
                dest=\"batch_size\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-out\",
                dest=\"model_out_path\", type=_make_parent_dirs_and_return_path, required=True,
                default=argparse.SUPPRESS)\n_parser.add_argument(\"--mlpipeline-metrics\",
                dest=\"mlpipeline_metrics_path\", type=_make_parent_dirs_and_return_path,
                required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--metrics-log\",
                dest=\"metrics_log_path\", type=_make_parent_dirs_and_return_path,
                required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs
                = train(**_parsed_args)\n"], "image": "jupyter/tensorflow-notebook:tensorflow-2.9.1"}},
                "inputs": [{"name": "data_train", "type": "String"}, {"default": "0.0001",
                "name": "lr", "optional": true, "type": "Float"}, {"default": "Adam",
                "name": "optimizer", "optional": true, "type": "String"}, {"default":
                "categorical_crossentropy", "name": "loss", "optional": true, "type":
                "String"}, {"default": "1", "name": "epochs", "optional": true, "type":
                "Integer"}, {"default": "32", "name": "batch_size", "optional": true,
                "type": "Integer"}], "name": "Train", "outputs": [{"name": "model_out",
                "type": "String"}, {"name": "mlpipeline_metrics", "type": "Metrics"},
                {"name": "metrics_log", "type": "String"}]}'
              sidecar.istio.io/inject: "false"
            labels:
              pipelines.kubeflow.org/cache_enabled: "true"
              pipelines.kubeflow.org/enable_caching: "true"
              pipelines.kubeflow.org/kfp_sdk_version: 1.8.11
              pipelines.kubeflow.org/pipeline-sdk-type: kfp
          name: train
          outputs:
            artifacts:
            - name: mlpipeline-metrics
              optional: true
              path: /tmp/outputs/mlpipeline_metrics/data
            - name: train-metrics_log
              path: /tmp/outputs/metrics_log/data
            - name: train-model_out
              path: /tmp/outputs/model_out/data